Пример #1
0
def worker(args, dev_id, start_idx, end_idx, result_queue):
    torch.cuda.set_device(dev_id)

    # Dataset and Loader
    dataset_val = ValDataset(
        broden_dataset.record_list['validation'], args,
        max_sample=args.num_val, start_idx=start_idx,
        end_idx=end_idx)
    loader_val = torchdata.DataLoader(
        dataset_val,
        batch_size=args.batch_size,
        shuffle=False,
        collate_fn=user_scattered_collate,
        num_workers=2)

    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(
        arch=args.arch_encoder,
        fc_dim=args.fc_dim,
        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(
        arch=args.arch_decoder,
        fc_dim=args.fc_dim,
        nr_classes=args.nr_classes,
        weights=args.weights_decoder,
        use_softmax=True)

    segmentation_module = SegmentationModule(net_encoder, net_decoder)

    segmentation_module.cuda()

    # Main loop
    evaluate(segmentation_module, loader_val, args, dev_id, result_queue)
Пример #2
0
def main(args):
    torch.cuda.set_device(args.gpu_id)

    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(
        arch=args.arch_encoder,
        fc_dim=args.fc_dim,
        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(
        arch=args.arch_decoder,
        fc_dim=args.fc_dim,
        nr_classes=args.nr_classes,
        weights=args.weights_decoder,
        use_softmax=True)
    
    segmentation_module = SegmentationModule(net_encoder, net_decoder)
    segmentation_module.cuda()

    # Dataset and Loader
    list_test = [{'fpath_img': args.test_img}]
    dataset_val = TestDataset(
        list_test, args, max_sample=args.num_val)
    loader_val = torchdata.DataLoader(
        dataset_val,
        batch_size=args.batch_size,
        shuffle=False,
        collate_fn=user_scattered_collate,
        num_workers=5,
        drop_last=True)

    # Main loop
    test(segmentation_module, loader_val, args)

    print('Inference done!')
Пример #3
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(
        arch=args.arch_encoder,
        fc_dim=args.fc_dim,
        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(
        arch=args.arch_decoder,
        fc_dim=args.fc_dim,
        nr_classes=args.nr_classes,
        weights=args.weights_decoder)

    # TODO(LYC):: move criterion outside model.
    # crit = nn.NLLLoss(ignore_index=-1)

    if args.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(
            net_encoder, net_decoder, args.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(
            net_encoder, net_decoder)

    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # create loader iterator
    iterator_train = create_multi_source_train_data_loader(args=args)

    # load nets into gpu
    if args.num_gpus > 1:
        segmentation_module = UserScatteredDataParallel(
            segmentation_module,
            device_ids=range(args.num_gpus))
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder)
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(args.start_epoch, args.num_epoch + 1):
        train(segmentation_module, iterator_train, optimizers, history, epoch, args)

        # checkpointing
        checkpoint(nets, history, args, epoch)

    print('Training Done!')
def main(args):
    # Network Builders
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    np.random.seed(0)
    random.seed(0)
    builder = ModelBuilder()
    net_sound = builder.build_sound(
        arch=args.arch_sound,
        input_channel=1,
        output_channel=args.num_channels,
        fc_dim=args.num_channels,
        weights=args.weights_sound)
    net_frame = builder.build_frame(
        arch=args.arch_frame,
        fc_dim=args.num_channels,
        pool_type=args.img_pool,
        weights=args.weights_frame)
    net_avol = builder.build_avol(
        arch=args.arch_avol,
        fc_dim=args.num_channels,
        weights=args.weights_frame)

    crit_loc = nn.BCELoss()
    crit_sep = builder.build_criterion(arch=args.loss)

    # Dataset and Loader
    dataset_train = MUSICMixDataset(
        args.list_train, args, split='train')
    dataset_val = MUSICMixDataset(
        args.list_val, args, max_sample=args.num_val, split='val')

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=int(args.workers),
        drop_last=True)
    loader_val = torch.utils.data.DataLoader(
        dataset_val,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=int(args.workers),
        drop_last=False)
    args.epoch_iters = len(dataset_train) // args.batch_size
    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # Set up optimizer
    optimizer = create_optimizer(net_sound, net_frame, net_avol, args)

    # History of peroformance
    history = {
        'train': {'epoch': [], 'err': [], 'err_loc': [], 'err_sep': [], 'acc': []},
        'val': {'epoch': [], 'err': [],  'err_loc': [], 'err_sep': [], 'acc': [], 'sdr': [], 'sir': [], 'sar': []}}


    # Training loop
    # Load from pretrained models
    start_epoch = 1
    model_name = args.ckpt + '/checkpoint.pth'
    if os.path.exists(model_name):
        if args.mode == 'eval':
            net_sound, net_frame, net_avol = load_checkpoint_from_train(net_sound, net_frame, net_avol, model_name)
        elif args.mode == 'train':
            model_name = args.ckpt + '/checkpoint_latest.pth'
            net_sound, net_frame, net_avol, optimizer, start_epoch, history = load_checkpoint(net_sound, net_frame, net_avol, optimizer, history, model_name)
            print("Loading from previous checkpoint.")
    
    else:
        if args.mode == 'train' and start_epoch==1 and os.path.exists(args.weights_model):
            net_sound, net_frame = load_sep(net_sound, net_frame, args.weights_model)
            print("Loading from appearance + sound checkpoint.")
    
    # Wrap networks
    netWrapper1 = NetWrapper1(net_sound)
    netWrapper1 = torch.nn.DataParallel(netWrapper1, device_ids=range(args.num_gpus)).cuda()
    netWrapper1.to(args.device)

    netWrapper2 = NetWrapper2(net_frame)
    netWrapper2 = torch.nn.DataParallel(netWrapper2, device_ids=range(args.num_gpus)).cuda()
    netWrapper2.to(args.device)

    netWrapper3 = NetWrapper3(net_avol)
    netWrapper3 = torch.nn.DataParallel(netWrapper3, device_ids=range(args.num_gpus)).cuda()
    netWrapper3.to(args.device)


    # Eval mode
    #evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, 0, args)
    if args.mode == 'eval':
        evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, 0, args)
        print('Evaluation Done!')
        return

        
    for epoch in range(start_epoch, args.num_epoch + 1):    
        train(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_train, optimizer, history, epoch, args)

        # drop learning rate
        if epoch in args.lr_steps:
            adjust_learning_rate(optimizer, args)

        ## Evaluation and visualization
        if epoch % args.eval_epoch == 0:
            evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, epoch, args)
            # checkpointing
            checkpoint(net_sound, net_frame, net_avol, optimizer, history, epoch, args)

    print('Training Done!')
Пример #5
0
    cfg.merge_from_file(model['config'])
    cfg.DATASET['root_dataset'] = './.data/vision/ade20k'
    cfg.DATASET['list_train'] = "./.data/vision/ade20k/training.odgt"
    cfg.DATASET['list_val'] = "./.data/vision/ade20k/validation.odgt"

    BATCH_SIZE = cfg.VAL.batch_size
    BATCH_SIZE = 32

    if not os.path.isdir(os.path.join(cfg.DIR, "result")):
        os.makedirs(os.path.join(cfg.DIR, "result"))

    torch.cuda.set_device(gpu)

    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder,
        use_softmax=True)

    crit = nn.NLLLoss(ignore_index=-1)

    segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)

    # Dataset and Loader
    dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val,
                             cfg.DATASET)
Пример #6
0
def main(args):
    # import network architecture
    builder = ModelBuilder()
    model = builder.build_net(arch=args.id,
                              num_input=args.num_input,
                              num_classes=args.num_classes,
                              num_branches=args.num_branches,
                              padding_list=args.padding_list,
                              dilation_list=args.dilation_list)
    device_ids = [0, 2]
    #     model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda()
    model = torch.nn.DataParallel(model, device_ids=device_ids).cuda()
    #     model = model.cuda()
    cudnn.benchmark = True

    # collect the number of parameters in the network
    print("------------------------------------------")
    print("Network Architecture of Model %s:" % (args.id))
    num_para = 0
    for name, param in model.named_parameters():
        num_mul = 1
        for x in param.size():
            num_mul *= x
        num_para += num_mul

    print(model)
    print("Number of trainable parameters %d in Model %s" %
          (num_para, args.id))
    print("------------------------------------------")

    # set the optimizer and loss
    optimizer = optim.RMSprop(model.parameters(),
                              args.lr,
                              alpha=args.alpha,
                              eps=args.eps,
                              weight_decay=args.weight_decay,
                              momentum=args.momentum)
    criterion = nn.CrossEntropyLoss()

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['opt_dict'])
            print("=> Loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> No checkpoint found at '{}'".format(args.resume))

    # loading data
    tf = TrainDataset(train_dir, args)
    train_loader = DataLoader(tf,
                              batch_size=args.batch_size,
                              shuffle=args.shuffle,
                              num_workers=args.num_workers,
                              pin_memory=True)

    print("Start training ...")
    for epoch in tqdm(range(args.start_epoch + 1, args.num_epochs + 1)):
        train(train_loader, model, criterion, optimizer, epoch, args)

        # save models
        if epoch > args.particular_epoch:
            if epoch % args.save_epochs_steps == 0:
                save_checkpoint(
                    {
                        'epoch': epoch,
                        'state_dict': model.state_dict(),
                        'opt_dict': optimizer.state_dict()
                    }, epoch, args)

    print("Training Done")
Пример #7
0
def main(cfg, gpus):
    torch.cuda.set_device(gpus[0])

    # Network Builders
    net_objectness = ModelBuilder.build_objectness(
        arch=cfg.MODEL.arch_objectness,
        weights=cfg.MODEL.weights_enc_query,
        fix_encoder=cfg.TRAIN.fix_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        input_dim=cfg.MODEL.decoder_dim,
        fc_dim=cfg.MODEL.fc_dim,
        ppm_dim=cfg.MODEL.ppm_dim,
        num_class=2,
        weights=cfg.MODEL.weights_decoder,
        dropout_rate=cfg.MODEL.dropout_rate,
        use_dropout=cfg.MODEL.use_dropout,
        use_softmax=True)

    crit = nn.NLLLoss(ignore_index=255)

    net_objectness.cuda()
    net_objectness.eval()

    net_decoder.cuda()
    net_decoder.eval()

    print('###### Prepare data ######')
    data_name = cfg.DATASET.name
    if data_name == 'VOC':
        if cfg.VAL.test_with_classes:
            from dataloaders.customized import voc_fewshot
        else:
            from dataloaders.customized_objectness import voc_fewshot
        make_data = voc_fewshot
        max_label = 20
    elif data_name == 'COCO':
        if cfg.VAL.test_with_classes:
            from dataloaders.customized import coco_fewshot
        else:
            from dataloaders.customized_objectness import coco_fewshot
        make_data = coco_fewshot
        max_label = 80
        split = cfg.DATASET.data_split + '2014'
        annFile = f'{cfg.DATASET.data_dir}/annotations/instances_{split}.json'
        cocoapi = COCO(annFile)
    else:
        raise ValueError('Wrong config for dataset!')
    labels = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][
        cfg.TASK.fold_idx]
    #labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx]
    #transforms = [Resize_test(size=cfg.DATASET.input_size)]
    val_transforms = [
        transforms.ToNumpy(),
        transforms.Resize_pad(size=cfg.DATASET.input_size[0])
    ]

    value_scale = 255
    mean = [0.485, 0.456, 0.406]
    mean = [item * value_scale for item in mean]
    std = [0.229, 0.224, 0.225]
    std = [item * value_scale for item in std]
    '''val_transforms = [
        transforms.ToNumpy(),
        #transforms.RandScale([0.9, 1.1]),
        #transforms.RandRotate([-10, 10], padding=mean, ignore_label=0),
        #transforms.RandomGaussianBlur(),
        #transforms.RandomHorizontalFlip(),
        transforms.Crop([cfg.DATASET.input_size[0], cfg.DATASET.input_size[1]], crop_type='rand', padding=mean, ignore_label=0)]'''

    val_transforms = Compose(val_transforms)

    print('###### Testing begins ######')
    metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs)
    with torch.no_grad():
        for run in range(cfg.VAL.n_runs):
            print(f'### Run {run + 1} ###')
            set_seed(cfg.VAL.seed + run)

            print(f'### Load data ###')
            dataset = make_data(
                base_dir=cfg.DATASET.data_dir,
                split=cfg.DATASET.data_split,
                transforms=val_transforms,
                to_tensor=transforms.ToTensorNormalize_noresize(),
                labels=labels,
                max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch,
                n_ways=cfg.TASK.n_ways,
                n_shots=cfg.TASK.n_shots,
                n_queries=cfg.TASK.n_queries,
                permute=cfg.VAL.permute_labels,
            )
            if data_name == 'COCO':
                coco_cls_ids = dataset.datasets[0].dataset.coco.getCatIds()
            testloader = DataLoader(dataset,
                                    batch_size=cfg.VAL.n_batch,
                                    shuffle=False,
                                    num_workers=1,
                                    pin_memory=True,
                                    drop_last=False)
            print(f"Total # of Data: {len(dataset)}")

            count = 0

            for sample_batched in tqdm.tqdm(testloader):
                feed_dict = data_preprocess(sample_batched, cfg)
                if data_name == 'COCO':
                    label_ids = [
                        coco_cls_ids.index(x) + 1
                        for x in sample_batched['class_ids']
                    ]
                else:
                    label_ids = list(sample_batched['class_ids'])

                feat = net_objectness(feed_dict['img_data'],
                                      return_feature_maps=True)
                query_pred = net_decoder(feat, segSize=(473, 473))

                metric.record(np.array(query_pred.argmax(dim=1)[0].cpu()),
                              np.array(feed_dict['seg_label'][0].cpu()),
                              labels=label_ids,
                              n_run=run)

                if cfg.VAL.visualize:
                    #print(as_numpy(feed_dict['seg_label'][0].cpu()).shape)
                    #print(as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())).shape)
                    #print(feed_dict['img_data'].cpu().shape)
                    query_name = sample_batched['query_ids'][0][0]
                    support_name = sample_batched['support_ids'][0][0][0]
                    if data_name == 'VOC':
                        img = imread(
                            os.path.join(cfg.DATASET.data_dir, 'JPEGImages',
                                         query_name + '.jpg'))
                    else:
                        query_name = int(query_name)
                        img_meta = cocoapi.loadImgs(query_name)[0]
                        img = imread(
                            os.path.join(cfg.DATASET.data_dir, split,
                                         img_meta['file_name']))
                    #img = imresize(img, cfg.DATASET.input_size)
                    visualize_result(
                        (img, as_numpy(feed_dict['seg_label'][0].cpu()),
                         '%05d' % (count)),
                        as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())),
                        os.path.join(cfg.DIR, 'result'))
                count += 1

            classIoU, meanIoU = metric.get_mIoU(labels=sorted(labels),
                                                n_run=run)
            classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=run)
            '''_run.log_scalar('classIoU', classIoU.tolist())
            _run.log_scalar('meanIoU', meanIoU.tolist())
            _run.log_scalar('classIoU_binary', classIoU_binary.tolist())
            _run.log_scalar('meanIoU_binary', meanIoU_binary.tolist())
            _log.info(f'classIoU: {classIoU}')
            _log.info(f'meanIoU: {meanIoU}')
            _log.info(f'classIoU_binary: {classIoU_binary}')
            _log.info(f'meanIoU_binary: {meanIoU_binary}')'''

    classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU(
        labels=sorted(labels))
    classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary(
    )

    print('----- Final Result -----')
    print('final_classIoU', classIoU.tolist())
    print('final_classIoU_std', classIoU_std.tolist())
    print('final_meanIoU', meanIoU.tolist())
    print('final_meanIoU_std', meanIoU_std.tolist())
    print('final_classIoU_binary', classIoU_binary.tolist())
    print('final_classIoU_std_binary', classIoU_std_binary.tolist())
    print('final_meanIoU_binary', meanIoU_binary.tolist())
    print('final_meanIoU_std_binary', meanIoU_std_binary.tolist())
    print(f'classIoU mean: {classIoU}')
    print(f'classIoU std: {classIoU_std}')
    print(f'meanIoU mean: {meanIoU}')
    print(f'meanIoU std: {meanIoU_std}')
    print(f'classIoU_binary mean: {classIoU_binary}')
    print(f'classIoU_binary std: {classIoU_std_binary}')
    print(f'meanIoU_binary mean: {meanIoU_binary}')
    print(f'meanIoU_binary std: {meanIoU_std_binary}')
Пример #8
0
def main(args):
    # Network Builders
    builder = ModelBuilder()

    unet = builder.build_unet(num_class=args.num_class,
                              arch=args.unet_arch,
                              weights=args.weights_unet)

    print("Froze the following layers: ")
    for name, p in unet.named_parameters():
        if p.requires_grad == False:
            print(name)
    print()

    crit = DualLoss(mode="train")

    segmentation_module = SegmentationModule(crit, unet)

    train_augs = Compose([
        PaddingCenterCrop(256),
        RandomHorizontallyFlip(),
        RandomVerticallyFlip(),
        RandomRotate(180)
    ])
    test_augs = Compose([PaddingCenterCrop(256)])

    # Dataset and Loader
    # dataset_train = AC17( #Loads 3D volumes
    #         root=args.data_root,
    #         split='train',
    #         k_split=args.k_split,
    #         augmentations=train_augs)

    dataset_train = SideWalkData(  # Loads 3D volumes
        root=args.data_root,
        split='train',
        k_split=args.k_split,
        augmentations=train_augs)
    ac17_train = load2D(
        dataset_train, split='train',
        deform=True)  #Dataloader for 2D slices. Requires 3D loader.

    loader_train = data.DataLoader(ac17_train,
                                   batch_size=args.batch_size_per_gpu,
                                   shuffle=True,
                                   num_workers=int(args.workers),
                                   drop_last=True,
                                   pin_memory=True)

    dataset_val = SideWalkData(root=args.data_root,
                               split='val',
                               k_split=args.k_split,
                               augmentations=test_augs)

    ac17_val = load2D(dataset_val, split='val', deform=False)

    loader_val = data.DataLoader(ac17_val,
                                 batch_size=1,
                                 shuffle=False,
                                 collate_fn=user_scattered_collate,
                                 num_workers=5,
                                 drop_last=True)

    # load nets into gpu
    if len(args.gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=args.gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit) if args.unet == False else (unet,
                                                                        crit)
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': [], 'jaccard': []}}
    best_val = {
        'epoch_1': 0,
        'mIoU_1': 0,
        'epoch_2': 0,
        'mIoU_2': 0,
        'epoch_3': 0,
        'mIoU_3': 0,
        'epoch': 0,
        'mIoU': 0
    }

    for epoch in range(args.start_epoch, args.num_epoch + 1):
        train(segmentation_module, loader_train, optimizers, history, epoch,
              args)
        iou, loss = eval(loader_val, segmentation_module, args, crit)
        #checkpointing
        ckpted = False
        if loss < 0.215:
            ckpted = True
        if iou[0] > best_val['mIoU_1']:
            best_val['epoch_1'] = epoch
            best_val['mIoU_1'] = iou[0]
            ckpted = True

        if iou[1] > best_val['mIoU_2']:
            best_val['epoch_2'] = epoch
            best_val['mIoU_2'] = iou[1]
            ckpted = True

        if iou[2] > best_val['mIoU_3']:
            best_val['epoch_3'] = epoch
            best_val['mIoU_3'] = iou[2]
            ckpted = True

        if (iou[0] + iou[1] + iou[2]) / 3 > best_val['mIoU']:
            best_val['epoch'] = epoch
            best_val['mIoU'] = (iou[0] + iou[1] + iou[2]) / 3
            ckpted = True

        if epoch % 50 == 0:
            checkpoint(nets, history, args, epoch)
            continue

        if epoch == args.num_epoch:
            checkpoint(nets, history, args, epoch)
            continue
        if epoch < 15:
            ckpted = False
        if ckpted == False:
            continue
        else:
            checkpoint(nets, history, args, epoch)
            continue
        print()

    print('Training Done!')
Пример #9
0
def main(args):
    torch.cuda.set_device(args.gpu)

    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                        fc_dim=args.fc_dim,
                                        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                        fc_dim=args.fc_dim,
                                        num_class=args.num_class,
                                        weights=args.weights_decoder,
                                        use_softmax=True)

    crit = nn.NLLLoss(ignore_index=-1)

    segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)

    # Dataset and Loader
    # list_test = [{'fpath_img': args.test_img}]
    #     test_chk = []
    #     testing = os.listdir("/home/teai/externalhd2/BDD100K/segmentation_v2/test/")
    #     for i in testing:
    #         if(i.endswith(".jpg")):
    #             test_chk.append("/home/teai/externalhd2/BDD100K/segmentation_v2/test/"+i)
    video_path = "./test_video_input/test_1.mp4"
    vidcap = cv2.VideoCapture(video_path)
    video_fps = math.ceil(vidcap.get(cv2.CAP_PROP_FPS))
    length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_array = []

    for i in tqdm(range(length - 1)):
        ret, frame = vidcap.read()
        cv2.imwrite("./test_video_input/frame.png", frame)
        test_chk = ['./test_video_input/frame.png']
        #     print(type(args.test_imgs))
        list_test = [{'fpath_img': x} for x in test_chk]
        #list_test=[{'fpath_img': 'frame_143.png'},{'fpath_img': 'frame_100.png'},{'fpath_img': 'frame_1.png'}]
        #print("list_test",list_test)
        dataset_test = TestDataset(list_test, args, max_sample=args.num_val)
        loader_test = torchdata.DataLoader(dataset_test,
                                           batch_size=args.batch_size,
                                           shuffle=False,
                                           collate_fn=user_scattered_collate,
                                           num_workers=5,
                                           drop_last=True)

        segmentation_module.cuda()

        # Main loop
        # 		start=time.time()
        test(segmentation_module, loader_test, args)
        # 		end=time.time()
        # 		print("Time taken",(end-start))
        #print('Inference done!')
        img = cv2.imread("./test_video_output/frame.png")
        height, width, layers = img.shape
        size = (width, height)
        frame_array.append(img)

    out = cv2.VideoWriter("./test_video_output/test_1_sgd_100.mp4",
                          cv2.VideoWriter_fourcc(*'DIVX'), video_fps, size)

    for i in range(len(frame_array)):
        # writing to a image array
        out.write(frame_array[i])
    out.release()
Пример #10
0
		pred,ind = torch.max(pred, dim=1)
		ind = as_numpy((ind.squeeze()).cpu())

		seg[:,:,0] = ind
		im = bridge.cv2_to_imgmsg(seg, "mono8")
		im.header = data.header
		pub.publish(im)
		stop = timeit.default_timer()
		print(stop-start)


img_transform = transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])
device = torch.cuda.set_device(0)


builder = ModelBuilder()
net_encoder = builder.build_encoder(
        arch='resnet18dilated',
        fc_dim=512,
		weights="ckpt/baseline-resnet18dilated-c1_deepsup/encoder_epoch_2.pth")

net_decoder = builder.build_decoder(
        arch='c1_deepsup',
        fc_dim=512,
        num_class=12,
        weights="ckpt/baseline-resnet18dilated-c1_deepsup/decoder_epoch_2.pth",
		use_softmax=True)

crit = nn.NLLLoss(ignore_index=-1)
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
segmentation_module.half()
Пример #11
0
def main(cfg, gpus):
    # Network Builders

    label_num_ = args.num_class

    if args.method == 'tdnet':
        n_img_per_gpu = int(args.batchsize / args.gpu_num)
        n_min = n_img_per_gpu * args.cropsize * args.cropsize // 16
        loss_fn = OhemCELoss2D(thresh=0.7, n_min=n_min, ignore_index=255)
        segmentation_module = td4_psp(args=args,
                                      backbone='resnet18',
                                      loss_fn=loss_fn)
        segmentation_module.pretrained_init()

    else:
        net_encoder = ModelBuilder.build_encoder(
            arch=cfg.MODEL.arch_encoder.lower(),
            fc_dim=cfg.MODEL.fc_dim,
            weights=cfg.MODEL.weights_encoder,
            args=args)
        net_decoder = ModelBuilder.build_decoder(
            arch=cfg.MODEL.arch_decoder.lower(),
            fc_dim=cfg.MODEL.fc_dim,
            num_class=label_num_,
            weights=cfg.MODEL.weights_decoder)

        crit = nn.NLLLoss(ignore_index=255)

        if args.method == 'netwarp':
            segmentation_module = NetWarp(net_encoder, net_decoder, crit, args,
                                          cfg.TRAIN.deep_sup_scale)
        elif args.method == 'ETC':
            segmentation_module = ETC(net_encoder, net_decoder, crit, args,
                                      cfg.TRAIN.deep_sup_scale)
        elif args.method == 'nonlocal3d':
            segmentation_module = Non_local3d(args, net_encoder, crit)

        elif args.method == 'our_warp':

            if args.deepsup_scale > 0.:
                segmentation_module = ClipWarpNet(net_encoder, net_decoder,
                                                  crit, args,
                                                  args.deepsup_scale)
            else:

                segmentation_module = ClipWarpNet(net_encoder, net_decoder,
                                                  crit, args)
        elif args.method == 'propnet':
            segmentation_module = PropNet(net_encoder,
                                          net_decoder,
                                          crit,
                                          args,
                                          deep_sup_scale=args.deepsup_scale)
        elif args.method == 'our_warp_merge':
            segmentation_module = OurWarpMerge(net_encoder,
                                               net_decoder,
                                               crit,
                                               args,
                                               deep_sup_scale=0.4)

        elif args.method == 'clip_psp':
            segmentation_module = Clip_PSP(net_encoder,
                                           crit,
                                           args,
                                           deep_sup_scale=0.4)
        elif args.method == 'clip_ocr':
            segmentation_module = ClipOCRNet(net_encoder,
                                             crit,
                                             args,
                                             deep_sup_scale=0.4)
        elif args.method == 'netwarp_ocr':
            segmentation_module = NetWarp_ocr(net_encoder,
                                              crit,
                                              args,
                                              deep_sup_scale=0.4)
        elif args.method == 'etc_ocr':
            segmentation_module = ETC_ocr(net_encoder,
                                          crit,
                                          args,
                                          deep_sup_scale=0.4)
        else:
            raise (NotImplementedError)

    # Dataset and Loader
    if args.method == 'clip_psp' or args.method == 'clip_ocr':
        dataset_train = BaseDataset_longclip(args, 'train')
    else:
        dataset_train = BaseDataset_clip(args, 'train')

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=args.batchsize,  # we have modified data_parallel
        shuffle=True,  # we do not use this param
        num_workers=args.workers,
        drop_last=True,
        pin_memory=False)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    # load nets into gpu

    segmentation_module.cuda(args.start_gpu)
    optimizer = create_optimizers(segmentation_module, cfg, args)
    if args.resume_epoch != 0:
        to_load = torch.load(
            os.path.join('./resume',
                         'model_epoch_{}.pth'.format(args.resume_epoch)),
            map_location=torch.device("cuda:" + str(args.start_gpu)))
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in to_load.items():
            name = k[7:]  # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module.
            new_state_dict[name] = v  #新字典的key值对应的value为一一对应的值。
        cfg.TRAIN.start_epoch = args.resume_epoch
        segmentation_module.load_state_dict(new_state_dict)
        optimizer.load_state_dict(
            torch.load(
                os.path.join('./resume',
                             'opt_epoch_{}.pth'.format(args.resume_epoch)),
                map_location=torch.device("cuda:" + str(args.start_gpu))))
        print('resume from epoch {}'.format(args.resume_epoch))

    if args.gpu_num > 1:
        train_gpu_ = list(range(args.gpu_num))
        train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_]
        print(train_gpu_)
        segmentation_module = torch.nn.DataParallel(segmentation_module,
                                                    device_ids=train_gpu_)
        patch_replication_callback(segmentation_module)

#    print(segmentation_module)
# Set up optimizers

# Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    #if len(args.resume_dir)>0:
    #    resume_epoch = args.resume_dir.split('.')[]

    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        print('Epoch {}'.format(epoch))
        #checkpoint(optimizer,segmentation_module, history, args, epoch+1)
        train(segmentation_module, loader_train, optimizer, history, epoch + 1,
              cfg, args)

        ###################        # checkpointing
        if (epoch + 1) % 20 == 0:
            checkpoint(optimizer, segmentation_module, history, args,
                       epoch + 1)
            if args.validation:
                test(segmentation_module, args)


#
    print('Training Done!')
Пример #12
0
def main():
    """Create the model and start the training."""
    with open(args.config) as f:
        config = yaml.load(f)
    for k, v in config['common'].items():
        setattr(args, k, v)
    mkdirs(osp.join("logs/"+args.exp_name))

    logger = create_logger('global_logger', "logs/" + args.exp_name + '/log.txt')
    logger.info('{}'.format(args))
##############################

    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))
    logger.info("random_scale {}".format(args.random_scale))
    logger.info("is_training {}".format(args.is_training))

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    h, w = map(int, args.input_size_target.split(','))
    input_size_target = (h, w)
    print(type(input_size_target[1]))
    cudnn.enabled = True
    args.snapshot_dir = args.snapshot_dir + args.exp_name
    tb_logger = SummaryWriter("logs/"+args.exp_name)
##############################

#validation data
    local_array = np.load("local.npy")
    local_array = local_array[:,:,:19]
    local_array = local_array / local_array.sum(2).reshape(512, 1024, 1)
    local_array = local_array.transpose(2,0,1)
    local_array = torch.from_numpy(local_array)
    local_array = local_array.view(1, 19, 512, 1024)
    h, w = map(int, args.input_size_test.split(','))
    input_size_test = (h,w)
    h, w = map(int, args.com_size.split(','))
    com_size = (h, w)
    h, w = map(int, args.input_size_crop.split(','))
    input_size_crop = h,w
    h,w = map(int, args.input_size_target_crop.split(','))
    input_size_target_crop = h,w

    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]


    normalize_module = transforms_seg.Normalize(mean=mean,
                                                std=std)

    test_normalize = transforms.Normalize(mean=mean,
                                          std=std)

    test_transform = transforms.Compose([
                         transforms.Resize((input_size_test[1], input_size_test[0])),
                         transforms.ToTensor(),
                         test_normalize])

    valloader = data.DataLoader(cityscapesDataSet(
                                       args.data_dir_target,
                                       args.data_list_target_val,
                                       crop_size=input_size_test,
                                       set='train',
                                       transform=test_transform),num_workers=args.num_workers,
                                 batch_size=1, shuffle=False, pin_memory=True)
    with open('./dataset/cityscapes_list/info.json', 'r') as fp:
        info = json.load(fp)
    mapping = np.array(info['label2train'], dtype=np.int)
    label_path_list_val = args.label_path_list_val
    gt_imgs_val = open(label_path_list_val, 'r').read().splitlines()
    gt_imgs_val = [osp.join(args.data_dir_target_val, x) for x in gt_imgs_val]


    name_classes = np.array(info['label'], dtype=np.str)
    interp_val = nn.Upsample(size=(com_size[1], com_size[0]),mode='bilinear', align_corners=True)

    ####
    #build model
    ####
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(
        arch=args.arch_encoder,
        fc_dim=args.fc_dim,
        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(
        arch=args.arch_decoder,
        fc_dim=args.fc_dim,
        num_class=args.num_classes,
        weights=args.weights_decoder,
        use_aux=True)

    weighted_softmax = pd.read_csv("weighted_loss.txt", header=None)
    weighted_softmax = weighted_softmax.values
    weighted_softmax = torch.from_numpy(weighted_softmax)
    weighted_softmax = weighted_softmax / torch.sum(weighted_softmax)
    weighted_softmax = weighted_softmax.cuda().float()
    model = SegmentationModule(
        net_encoder, net_decoder, args.use_aux)

    if args.num_gpus > 1:
        model = torch.nn.DataParallel(model)
        patch_replication_callback(model)
    model.cuda()

    nets = (net_encoder, net_decoder, None, None)
    optimizers = create_optimizer(nets, args)
    cudnn.enabled=True
    cudnn.benchmark=True
    model.train()



    mean_mapping = [0.485, 0.456, 0.406]
    mean_mapping = [item * 255 for item in mean_mapping]

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)
    source_transform = transforms_seg.Compose([
                             transforms_seg.Resize([input_size[1], input_size[0]]),
                             #segtransforms.RandScale((0.75, args.scale_max)),
                             #segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label),
                             #segtransforms.RandomGaussianBlur(),
                             #segtransforms.RandomHorizontalFlip(),
                             #segtransforms.Crop([input_size_crop[1], input_size_crop[0]], crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label),
                             transforms_seg.ToTensor(),
                             normalize_module])


    target_transform = transforms_seg.Compose([
                             transforms_seg.Resize([input_size_target[1], input_size_target[0]]),
                             #segtransforms.RandScale((0.75, args.scale_max)),
                             #segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label),
                             #segtransforms.RandomGaussianBlur(),
                             #segtransforms.RandomHorizontalFlip(),
                             #segtransforms.Crop([input_size_target_crop[1], input_size_target_crop[0]],crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label),
                             transforms_seg.ToTensor(),
                             normalize_module])
    trainloader = data.DataLoader(
        GTA5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size,
                    crop_size=input_size, transform = source_transform),
        batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(fake_cityscapesDataSet(args.data_dir_target, args.data_list_target,
                                                     max_iters=args.num_steps * args.iter_size * args.batch_size,
                                                     crop_size=input_size_target,
                                                     set=args.set,
                                                     transform=target_transform),
                                   batch_size=args.batch_size, shuffle=True, num_workers=5,
                                   pin_memory=True)


    targetloader_iter = enumerate(targetloader)
    # implement model.optim_parameters(args) to handle different models' lr setting


    criterion_seg = torch.nn.CrossEntropyLoss(ignore_index=255,reduce=False)
    criterion_pseudo = torch.nn.BCEWithLogitsLoss(reduce=False).cuda()
    bce_loss = torch.nn.BCEWithLogitsLoss().cuda()
    criterion_reconst = torch.nn.L1Loss().cuda()
    criterion_soft_pseudo = torch.nn.MSELoss(reduce=False).cuda()
    criterion_box = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False)
    interp = nn.Upsample(size=(input_size[1], input_size[0]),align_corners=True, mode='bilinear')
    interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), align_corners=True, mode='bilinear')

    # labels for adversarial training
    source_label = 0
    target_label = 1


    optimizer_encoder, optimizer_decoder, optimizer_disc, optimizer_reconst = optimizers
    batch_time = AverageMeter(10)
    loss_seg_value1 = AverageMeter(10)
    best_mIoUs = 0
    best_test_mIoUs = 0
    loss_seg_value2 = AverageMeter(10)
    loss_reconst_source_value = AverageMeter(10)
    loss_reconst_target_value = AverageMeter(10)
    loss_balance_value = AverageMeter(10)
    loss_eq_att_value = AverageMeter(10)
    loss_pseudo_value = AverageMeter(10)
    bounding_num = AverageMeter(10)
    pseudo_num = AverageMeter(10)
    loss_bbx_att_value = AverageMeter(10)

    for i_iter in range(args.num_steps):
        # train G

        # don't accumulate grads in D

        end = time.time()
        _, batch = trainloader_iter.__next__()
        images, labels, _ = batch
        images = Variable(images).cuda(async=True)
        labels = Variable(labels).cuda(async=True)
        results  = model(images, labels)
        loss_seg2 = results[-2]
        loss_seg1 = results[-1]

        loss_seg2 = torch.mean(loss_seg2)
        loss_seg1 = torch.mean(loss_seg1)
        loss = args.lambda_trade_off*(loss_seg2+args.lambda_seg * loss_seg1)
        # proper normalization
        #logger.info(loss_seg1.data.cpu().numpy())
        loss_seg_value2.update(loss_seg2.data.cpu().numpy())
        optimizer_encoder.zero_grad()
        optimizer_decoder.zero_grad()
        loss.backward()
        optimizer_encoder.step()
        optimizer_decoder.step()

        _, batch = targetloader_iter.__next__()
        images, fake_labels, _ = batch
        images = Variable(images).cuda(async=True)
        fake_labels = Variable(fake_labels, requires_grad=False).cuda()
        results = model(images, None)
        target_seg = results[0]
        conf_tea, pseudo_label = torch.max(nn.functional.softmax(target_seg), dim=1)
        pseudo_label = pseudo_label.detach()
        # pseudo label hard
        loss_pseudo = criterion_seg(target_seg, pseudo_label)
        fake_mask = (fake_labels!=255).float().detach()
        conf_mask = torch.gt(conf_tea, args.conf_threshold).float().detach()
        loss_pseudo = loss_pseudo * conf_mask.detach() * fake_mask.detach()
        loss_pseudo = loss_pseudo.view(-1)
        loss_pseudo = loss_pseudo[loss_pseudo!=0]
        #loss_pseudo = torch.sum(loss_pseudo * conf_mask.detach() * fake_mask.detach())
        predict_class_mean = torch.mean(nn.functional.softmax(target_seg), dim=0).mean(1).mean(1)
        equalise_cls_loss = robust_binary_crossentropy(predict_class_mean, weighted_softmax)
        #equalise_cls_loss = torch.mean(equalise_cls_loss)* args.num_classes * torch.sum(conf_mask * fake_mask) / float(input_size_crop[0] * input_size_crop[1] * args.batch_size)
        # new equalise_cls_loss
        equalise_cls_loss = torch.mean(equalise_cls_loss)
        #loss=args.lambda_balance * equalise_cls_loss
        #bbx attention
        loss_bbx_att = []
        loss_eq_att = []
        for box_idx, box_size in enumerate(args.box_size):
            pooling = torch.nn.AvgPool2d(box_size)
            pooling_result_i = pooling(target_seg)
            local_i = pooling(local_array).float().cuda()
            pooling_conf_mask, pooling_pseudo = torch.max(nn.functional.softmax(pooling_result_i), dim=1)
            pooling_conf_mask = torch.gt(pooling_conf_mask, args.conf_threshold).float().detach()
            fake_mask_i = pooling(fake_labels.unsqueeze(1).float())
            fake_mask_i = fake_mask_i.squeeze(1)
            fake_mask_i = (fake_mask_i!=255).float().detach()
            loss_bbx_att_i = criterion_seg(pooling_result_i, pooling_pseudo)
            loss_bbx_att_i = loss_bbx_att_i * pooling_conf_mask * fake_mask_i
            loss_bbx_att_i = loss_bbx_att_i.view(-1)
            loss_bbx_att_i = loss_bbx_att_i[loss_bbx_att_i!=0]
            loss_bbx_att.append(loss_bbx_att_i)
            pooling_result_i = pooling_result_i.mean(0).unsqueeze(0)

            equalise_cls_loss_i = robust_binary_crossentropy(nn.functional.softmax(pooling_result_i), local_i)
            equalise_cls_loss_i = equalise_cls_loss_i.mean(1)
            equalise_cls_loss_i = equalise_cls_loss_i * pooling_conf_mask * fake_mask_i
            equalise_cls_loss_i = equalise_cls_loss_i.view(-1)
            equalise_cls_loss_i = equalise_cls_loss_i[equalise_cls_loss_i!=0]
            loss_eq_att.append(equalise_cls_loss_i)


        if len(args.box_size) > 0:
            if args.merge_1x1:
                loss_bbx_att.append(loss_pseudo)
            loss_bbx_att = torch.cat(loss_bbx_att, dim=0)
            bounding_num.update(loss_bbx_att.size(0) / float(560*480*args.batch_size))
            loss_bbx_att = torch.mean(loss_bbx_att)

            loss_eq_att = torch.cat(loss_eq_att, dim=0)
            loss_eq_att = torch.mean(loss_eq_att)

            loss_eq_att_value.update(loss_eq_att.item())
        else:
            loss_bbx_att = torch.mean(loss_pseudo)
            loss_eq_att = 0


        pseudo_num.update(loss_pseudo.size(0) / float(560*480*args.batch_size))
        loss_pseudo = torch.mean(loss_pseudo)
        if not args.merge_1x1:
            loss += args.lambda_pseudo * loss_pseudo
        loss = args.lambda_balance * equalise_cls_loss
        if not isinstance(loss_bbx_att, list):
            loss += args.lambda_pseudo * loss_bbx_att
        loss += args.lambda_eq * loss_eq_att
        loss_pseudo_value.update(loss_pseudo.item())
        loss_balance_value.update(equalise_cls_loss.item())


        optimizer_encoder.zero_grad()
        optimizer_decoder.zero_grad()
        loss.backward()
        optimizer_encoder.step()
        optimizer_decoder.step()
        #optimizer_disc.step()
        #loss_target_disc_value.update(loss_target_disc.data.cpu().numpy())




        batch_time.update(time.time() - end)

        remain_iter = args.num_steps - i_iter
        remain_time = remain_iter * batch_time.avg
        t_m, t_s = divmod(remain_time, 60)
        t_h, t_m = divmod(t_m, 60)
        remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s))



        if i_iter == args.decrease_lr:
            adjust_learning_rate(optimizer_encoder, i_iter, args.lr_encoder, args)
            adjust_learning_rate(optimizer_decoder, i_iter, args.lr_decoder, args)
        if i_iter % args.print_freq == 0:
            lr_encoder = optimizer_encoder.param_groups[0]['lr']
            lr_decoder = optimizer_decoder.param_groups[0]['lr']
            logger.info('exp = {}'.format(args.snapshot_dir))
            logger.info('Iter = [{0}/{1}]\t'
                        'Time = {batch_time.avg:.3f}\t'
                        'loss_seg1 = {loss_seg1.avg:4f}\t'
                        'loss_seg2 = {loss_seg2.avg:.4f}\t'
                        'loss_reconst_source = {loss_reconst_source.avg:.4f}\t'
                        'loss_bbx_att = {loss_bbx_att.avg:.4f}\t'
                        'loss_reconst_target = {loss_reconst_target.avg:.4f}\t'
                        'loss_pseudo = {loss_pseudo.avg:.4f}\t'
                        'loss_eq_att = {loss_eq_att.avg:.4f}\t'
                        'loss_balance = {loss_balance.avg:.4f}\t'
                        'bounding_num = {bounding_num.avg:.4f}\t'
                        'pseudo_num = {pseudo_num.avg:4f}\t'
                        'lr_encoder = {lr_encoder:.8f} lr_decoder = {lr_decoder:.8f}'.format(
                         i_iter, args.num_steps, batch_time=batch_time,
                         loss_seg1=loss_seg_value1, loss_seg2=loss_seg_value2,
                         loss_pseudo=loss_pseudo_value,
                         loss_bbx_att = loss_bbx_att_value,
                         bounding_num = bounding_num,
                         loss_eq_att = loss_eq_att_value,
                         pseudo_num = pseudo_num,
                         loss_reconst_source=loss_reconst_source_value,
                         loss_balance=loss_balance_value,
                         loss_reconst_target=loss_reconst_target_value,
                         lr_encoder=lr_encoder,
                         lr_decoder=lr_decoder))


            logger.info("remain_time: {}".format(remain_time))
            if not tb_logger is None:
                tb_logger.add_scalar('loss_seg_value1', loss_seg_value1.avg, i_iter)
                tb_logger.add_scalar('loss_seg_value2', loss_seg_value2.avg, i_iter)
                tb_logger.add_scalar('bounding_num', bounding_num.avg, i_iter)
                tb_logger.add_scalar('pseudo_num', pseudo_num.avg, i_iter)
                tb_logger.add_scalar('loss_pseudo', loss_pseudo_value.avg, i_iter)
                tb_logger.add_scalar('lr', lr_encoder, i_iter)
                tb_logger.add_scalar('loss_balance', loss_balance_value.avg, i_iter)
            #####
            #save image result
            if i_iter % args.save_pred_every == 0 and i_iter != 0:
                logger.info('taking snapshot ...')
                model.eval()

                val_time = time.time()
                hist = np.zeros((19,19))
                # f = open(args.result_dir, 'a')
                # for index, batch in tqdm(enumerate(testloader)):
                #     with torch.no_grad():
                #         image, name = batch
                #         results = model(Variable(image).cuda(), None)
                #         output2 = results[0]
                #         pred = interp_val(output2)
                #         del output2
                #         pred = pred.cpu().data[0].numpy()
                #         pred = pred.transpose(1, 2, 0)
                #         pred = np.asarray(np.argmax(pred, axis=2), dtype=np.uint8)
                #         label = np.array(Image.open(gt_imgs_val[index]))
                #         #label = np.array(label.resize(com_size, Image.
                #         label = label_mapping(label, mapping)
                #         #logger.info(label.shape)
                #         hist += fast_hist(label.flatten(), pred.flatten(), 19)
                # mIoUs = per_class_iu(hist)
                # for ind_class in range(args.num_classes):
                #     logger.info('===>' + name_classes[ind_class] + ':\t' + str(round(mIoUs[ind_class] * 100, 2)))
                #     tb_logger.add_scalar(name_classes[ind_class] + '_mIoU', mIoUs[ind_class], i_iter)


                # logger.info(mIoUs)
                # tb_logger.add_scalar('val mIoU', mIoUs, i_iter)
                # tb_logger.add_scalar('val mIoU', mIoUs, i_iter)
                # f.write('i_iter:{:d},\tmiou:{:0.3f} \n'.format(i_iter, mIoUs))
                # f.close()
                # if mIoUs > best_mIoUs:
                is_best = True
                # best_mIoUs = mIoUs
                #test validation
                model.eval()
                val_time = time.time()
                hist = np.zeros((19,19))
                # f = open(args.result_dir, 'a')
                for index, batch in tqdm(enumerate(valloader)):
                    with torch.no_grad():
                        image, name = batch
                        results = model(Variable(image).cuda(), None)
                        output2 = results[0]
                        pred = interp_val(output2)
                        del output2
                        pred = pred.cpu().data[0].numpy()
                        pred = pred.transpose(1, 2, 0)
                        pred = np.asarray(np.argmax(pred, axis=2), dtype=np.uint8)
                        label = np.array(Image.open(gt_imgs_val[index]))
                        #label = np.array(label.resize(com_size, Image.
                        label = label_mapping(label, mapping)
                        #logger.info(label.shape)
                        hist += fast_hist(label.flatten(), pred.flatten(), 19)
                mIoUs = per_class_iu(hist)
                for ind_class in range(args.num_classes):
                    logger.info('===>' + name_classes[ind_class] + ':\t' + str(round(mIoUs[ind_class] * 100, 2)))
                    tb_logger.add_scalar(name_classes[ind_class] + '_mIoU', mIoUs[ind_class], i_iter)

                mIoUs = round(np.nanmean(mIoUs) *100, 2)
                is_best_test = False
                logger.info(mIoUs)
                tb_logger.add_scalar('test mIoU', mIoUs, i_iter)
                if mIoUs > best_test_mIoUs:
                    best_test_mIoUs = mIoUs
                    is_best_test = True
                # logger.info("best mIoU {}".format(best_mIoUs))
                logger.info("best test mIoU {}".format(best_test_mIoUs))
                net_encoder, net_decoder, net_disc, net_reconst = nets
                save_checkpoint(net_encoder, 'encoder', i_iter, args, is_best_test)
                save_checkpoint(net_decoder, 'decoder', i_iter, args, is_best_test)
                is_best_test = False
            model.train()
Пример #13
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                        fc_dim=args.fc_dim,
                                        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                        fc_dim=args.fc_dim,
                                        num_class=150,
                                        weights=args.weights_decoder)

    crit = nn.NLLLoss(ignore_index=-1)

    if args.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit, args.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    ########
    for param in segmentation_module.encoder.parameters():
        param.requires_grad = False
    #for name, param in segmentation_module.decoder.named_parameters():
    #   print(name)
    #  if(name == "conv_last.weight" or name =="conv_last.bias" or name =="conv_last_deepsup.weight" or name =="conv_last_deepsup.bias"):
    #     param.requires_grad = True
    #else:
    #   param.requires_grad = False
    #print(param.requires_grad)
    segmentation_module.decoder.conv_last = nn.Conv2d(args.fc_dim // 4, 12, 1,
                                                      1, 0)
    #segmentation_module.decoder.conv_last.
    segmentation_module.decoder.conv_last_deepsup = nn.Conv2d(
        args.fc_dim // 4, 12, 1, 1, 0)
    ########

    # Dataset and Loader
    dataset_train = TrainDataset(args.list_train,
                                 args,
                                 batch_per_gpu=args.batch_size_per_gpu)

    loader_train = torchdata.DataLoader(
        dataset_train,
        batch_size=len(args.gpus),  # we have modified data_parallel
        shuffle=False,  # we do not use this param
        collate_fn=user_scattered_collate,
        num_workers=int(args.workers),
        drop_last=True,
        pin_memory=True)

    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)
    #######
    #torch.backends.cudnn.benchmark = True
    #CUDA_LAUNCH_BLOCKING=1
    #######
    # load nets into gpu
    if len(args.gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=args.gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(args.start_epoch, args.num_epoch + 1):
        train(segmentation_module, iterator_train, optimizers, history, epoch,
              args)

        # checkpointing
        checkpoint(nets, history, args, epoch)

    print('Training Done!')
Пример #14
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(weights=args.weights_encoder)
    net_decoder_1 = builder.build_decoder(weights=args.weights_decoder, use_softmax=False)
    net_decoder_2 = builder.build_decoder(arch='c1',num_class=args.num_class,
                                          num_plane=args.num_plane, use_softmax=False,
                                          weights=args.weights_plane_net)
    
    # Warp application module
    warp = NovelViewHomography()
    
    if args.weighted_class:
        crit1 = nn.NLLLoss(ignore_index=-1, weight=args.class_weight)
    else:
        crit1 = nn.NLLLoss(ignore_index=-1)
    crit2 = nn.MSELoss()

    # Dataset and Loader
    dataset_train_sup = CityScapes('train', root=args.root_cityscapes, cropSize=args.imgSize,
                             max_sample=args.num_sup, is_train=1)
    dataset_train_unsup = CityScapes('train', root=args.root_cityscapes, cropSize=args.imgSize,
                             max_sample=-1, is_train=1)
    dataset_val = CityScapes('val', root=args.root_cityscapes, cropSize=args.imgSize,
                             max_sample=args.num_val, is_train=0)

    loader_train_sup = torch.utils.data.DataLoader(
        dataset_train_sup,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=int(args.workers),
        drop_last=True)
    loader_train_unsup = torch.utils.data.DataLoader(
        dataset_train_unsup,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=int(args.workers),
        drop_last=True)
    loader_val = torch.utils.data.DataLoader(
        dataset_val,
        batch_size=args.batch_size_eval,
        shuffle=False,
        num_workers=int(args.workers),
        drop_last=True)

    args.epoch_iters = int((args.gamma * len(dataset_train_sup) + len(dataset_train_unsup)) / args.batch_size)
    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # load nets into gpu
    if args.num_gpus > 1:
        net_encoder = nn.DataParallel(net_encoder,
                                      device_ids=range(args.num_gpus))
        net_decoder_1 = nn.DataParallel(net_decoder_1,
                                        device_ids=range(args.num_gpus))
        net_decoder_2 = nn.DataParallel(net_decoder_2,
                                        device_ids=range(args.num_gpus))

    nets = (net_encoder, net_decoder_1, net_decoder_2, warp, crit1, crit2)
    for net in nets:
        net.cuda()

    # Set up optimizers
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {split: {'epoch': [], 'err': [], 'acc': [], 'mIoU': []}
               for split in ('train', 'val')}

    # optional initial eval
    evaluate(nets, loader_val, history, 0, args)
    for epoch in range(1, args.num_epoch + 1):
        train(nets, loader_train_sup, loader_train_unsup, optimizers, history, epoch, args)

        # Evaluation and visualization
        if epoch % args.eval_epoch == 0:
            evaluate(nets, loader_val, history, epoch, args)

        # checkpointing
        checkpoint(nets, history, args)

        # adjust learning rate
        adjust_learning_rate(optimizers, epoch, args)

    print('Training Done!')
Пример #15
0
def main(cfg, gpus):
    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    crit = nn.NLLLoss(ignore_index=-1)

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),  # we have modified data_parallel
        shuffle=False,  # we do not use this param
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)

    # load nets into gpu
    if len(gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)

        # checkpointing
        checkpoint(nets, history, cfg, epoch + 1)

    print('Training Done!')
def load_model(data_gen: AudioGenerator, model_builder: ModelBuilder):
    model = model_builder.model(input_shape=(None, data_gen.input_dim), output_dim=29)
    model.load_weights('results/' + ("Spec " if data_gen.spectrogram else "MFCC ") + model.name + '.h5')
    return model
def main(cfg, gpus):
    torch.backends.cudnn.enabled = False
    # cudnn.deterministic = False
    # cudnn.enabled = True
    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    if cfg.MODEL.arch_decoder == 'ocr':
        print('Using cross entropy loss')
        crit = CrossEntropy(ignore_label=-1)
    else:
        crit = nn.NLLLoss(ignore_index=-1)

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),
        shuffle=False,  # parameter is not used
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True)
    # create loader iterator
    iterator_train = iter(loader_train)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    if cfg.TRAIN.eval:
        # Dataset and Loader for validtaion data
        dataset_val = ValDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_val, cfg.DATASET)
        loader_val = torch.utils.data.DataLoader(
            dataset_val,
            batch_size=cfg.VAL.batch_size,
            shuffle=False,
            collate_fn=user_scattered_collate,
            num_workers=5,
            drop_last=True)
        iterator_val = iter(loader_val)

    # load nets into gpu
    if len(gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    # Main loop
    history = {
        'train': {
            'epoch': [],
            'loss': [],
            'acc': [],
            'last_score': 0,
            'best_score': cfg.TRAIN.best_score
        }
    }
    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)
        # calculate segmentation score
        if cfg.TRAIN.eval and epoch in range(cfg.TRAIN.start_epoch,
                                             cfg.TRAIN.num_epoch,
                                             step=cfg.TRAIN.eval_step):
            iou, acc = evaluate(segmentation_module, iterator_val, cfg, gpus)
            history['train']['last_score'] = (iou + acc) / 2
            if history['train']['last_score'] > history['train']['best_score']:
                history['train']['best_score'] = history['train']['last_score']
                checkpoint(nets, history, cfg, 'best_score')
        # checkpointing
        checkpoint(nets, history, cfg, epoch + 1)
    print('Training Done!')
Пример #18
0
def main(cfg, gpus):
    # Network Builders
    torch.cuda.set_device(gpus[0])
    print('###### Create model ######')
    net_enc_query = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_enc_query,
        fix_encoder=cfg.TRAIN.fix_encoder)
    net_enc_memory = ModelBuilder.build_encoder_memory_separate(
        arch=cfg.MODEL.arch_memory_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_enc_memory,
        num_class=cfg.TASK.n_ways + 1,
        RGB_mask_combine_val=cfg.DATASET.RGB_mask_combine_val,
        segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate)
    net_att_query = ModelBuilder.build_attention(
        arch=cfg.MODEL.arch_attention,
        input_dim=cfg.MODEL.encoder_dim,
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_att_query)
    net_att_memory = ModelBuilder.build_attention(
        arch=cfg.MODEL.arch_attention,
        input_dim=cfg.MODEL.fc_dim,
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_att_memory)
    net_projection = ModelBuilder.build_projection(
        arch=cfg.MODEL.arch_projection,
        input_dim=cfg.MODEL.encoder_dim,
        fc_dim=cfg.MODEL.projection_dim,
        weights=cfg.MODEL.weights_projection)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        input_dim=cfg.MODEL.decoder_dim,
        fc_dim=cfg.MODEL.decoder_fc_dim,
        ppm_dim=cfg.MODEL.ppm_dim,
        num_class=cfg.TASK.n_ways + 1,
        weights=cfg.MODEL.weights_decoder,
        dropout_rate=cfg.MODEL.dropout_rate,
        use_dropout=cfg.MODEL.use_dropout)

    if cfg.MODEL.weights_objectness and cfg.MODEL.weights_objectness_decoder:
        '''net_objectness = ModelBuilder.build_objectness(
            arch='resnet50_deeplab',
            weights=cfg.MODEL.weights_objectness,
            fix_encoder=True)
        net_objectness_decoder = ModelBuilder.build_decoder(
            arch='aspp_few_shot',
            input_dim=2048,
            fc_dim=256,
            ppm_dim=256,
            num_class=2,
            weights=cfg.MODEL.weights_objectness_decoder,
            dropout_rate=0.5,
            use_dropout=True)'''
        net_objectness = ModelBuilder.build_objectness(
            arch='hrnetv2',
            weights=cfg.MODEL.weights_objectness,
            fix_encoder=True)
        net_objectness_decoder = ModelBuilder.build_decoder(
            arch='c1_nodropout',
            input_dim=720,
            fc_dim=720,
            ppm_dim=256,
            num_class=2,
            weights=cfg.MODEL.weights_objectness_decoder,
            use_dropout=False)
        for param in net_objectness.parameters():
            param.requires_grad = False
        for param in net_objectness_decoder.parameters():
            param.requires_grad = False
    else:
        net_objectness = None
        net_objectness_decoder = None

    crit = nn.NLLLoss(ignore_index=255)

    segmentation_module = SegmentationAttentionSeparateModule(
        net_enc_query,
        net_enc_memory,
        net_att_query,
        net_att_memory,
        net_decoder,
        net_projection,
        net_objectness,
        net_objectness_decoder,
        crit,
        zero_memory=cfg.MODEL.zero_memory,
        random_memory_bias=cfg.MODEL.random_memory_bias,
        random_memory_nobias=cfg.MODEL.random_memory_nobias,
        random_scale=cfg.MODEL.random_scale,
        zero_qval=cfg.MODEL.zero_qval,
        normalize_key=cfg.MODEL.normalize_key,
        p_scalar=cfg.MODEL.p_scalar,
        memory_feature_aggregation=cfg.MODEL.memory_feature_aggregation,
        memory_noLabel=cfg.MODEL.memory_noLabel,
        mask_feat_downsample_rate=cfg.MODEL.mask_feat_downsample_rate,
        att_mat_downsample_rate=cfg.MODEL.att_mat_downsample_rate,
        objectness_feat_downsample_rate=cfg.MODEL.
        objectness_feat_downsample_rate,
        segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate,
        mask_foreground=cfg.MODEL.mask_foreground,
        global_pool_read=cfg.MODEL.global_pool_read,
        average_memory_voting=cfg.MODEL.average_memory_voting,
        average_memory_voting_nonorm=cfg.MODEL.average_memory_voting_nonorm,
        mask_memory_RGB=cfg.MODEL.mask_memory_RGB,
        linear_classifier_support=cfg.MODEL.linear_classifier_support,
        decay_lamb=cfg.MODEL.decay_lamb,
        linear_classifier_support_only=cfg.MODEL.
        linear_classifier_support_only,
        qread_only=cfg.MODEL.qread_only,
        feature_as_key=cfg.MODEL.feature_as_key,
        objectness_multiply=cfg.MODEL.objectness_multiply)

    print('###### Load data ######')
    data_name = cfg.DATASET.name
    if data_name == 'VOC':
        from dataloaders.customized_objectness_debug import voc_fewshot
        make_data = voc_fewshot
        max_label = 20
    elif data_name == 'COCO':
        from dataloaders.customized_objectness_debug import coco_fewshot
        make_data = coco_fewshot
        max_label = 80
    else:
        raise ValueError('Wrong config for dataset!')
    labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx]
    labels_val = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][
        cfg.TASK.fold_idx]
    if cfg.DATASET.exclude_labels:
        exclude_labels = labels_val
    else:
        exclude_labels = []
    transforms = Compose([Resize(size=cfg.DATASET.input_size), RandomMirror()])
    dataset = make_data(base_dir=cfg.DATASET.data_dir,
                        split=cfg.DATASET.data_split,
                        transforms=transforms,
                        to_tensor=ToTensorNormalize(),
                        labels=labels,
                        max_iters=cfg.TRAIN.n_iters * cfg.TRAIN.n_batch,
                        n_ways=cfg.TASK.n_ways,
                        n_shots=cfg.TASK.n_shots,
                        n_queries=cfg.TASK.n_queries,
                        permute=cfg.TRAIN.permute_labels,
                        exclude_labels=exclude_labels,
                        use_ignore=cfg.use_ignore)
    trainloader = DataLoader(dataset,
                             batch_size=cfg.TRAIN.n_batch,
                             shuffle=True,
                             num_workers=4,
                             pin_memory=True,
                             drop_last=True)

    #segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_enc_query, net_enc_memory, net_att_query, net_att_memory,
            net_decoder, net_projection, crit)
    optimizers = create_optimizers(nets, cfg)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    ave_total_loss = AverageMeter()
    ave_acc = AverageMeter()

    history = {'train': {'iter': [], 'loss': [], 'acc': []}}

    segmentation_module.train(not cfg.TRAIN.fix_bn)
    if net_objectness and net_objectness_decoder:
        net_objectness.eval()
        net_objectness_decoder.eval()

    best_iou = 0
    # main loop
    tic = time.time()

    print('###### Training ######')
    for i_iter, sample_batched in enumerate(trainloader):
        # Prepare input
        feed_dict = data_preprocess(sample_batched, cfg)

        data_time.update(time.time() - tic)
        segmentation_module.zero_grad()

        # adjust learning rate
        adjust_learning_rate(optimizers, i_iter, cfg)

        # forward pass
        #print(batch_data)
        loss, acc = segmentation_module(feed_dict)
        loss = loss.mean()
        acc = acc.mean()

        # Backward
        loss.backward()
        for optimizer in optimizers:
            if optimizer:
                optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - tic)
        tic = time.time()

        # update average loss and acc
        ave_total_loss.update(loss.data.item())
        ave_acc.update(acc.data.item() * 100)

        # calculate accuracy, and display
        if i_iter % cfg.TRAIN.disp_iter == 0:
            print('Iter: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, '
                  'lr_encoder: {:.6f}, lr_decoder: {:.6f}, '
                  'Accuracy: {:4.2f}, Loss: {:.6f}'.format(
                      i_iter, i_iter, cfg.TRAIN.n_iters, batch_time.average(),
                      data_time.average(), cfg.TRAIN.running_lr_encoder,
                      cfg.TRAIN.running_lr_decoder, ave_acc.average(),
                      ave_total_loss.average()))

            history['train']['iter'].append(i_iter)
            history['train']['loss'].append(loss.data.item())
            history['train']['acc'].append(acc.data.item())

        if (i_iter + 1) % cfg.TRAIN.save_freq == 0:
            checkpoint(nets, history, cfg, i_iter + 1)

        if (i_iter + 1) % cfg.TRAIN.eval_freq == 0:
            metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs)
            with torch.no_grad():
                print('----Evaluation----')
                segmentation_module.eval()
                net_decoder.use_softmax = True
                for run in range(cfg.VAL.n_runs):
                    print(f'### Run {run + 1} ###')
                    set_seed(cfg.VAL.seed + run)

                    print(f'### Load validation data ###')
                    dataset_val = make_data(base_dir=cfg.DATASET.data_dir,
                                            split=cfg.DATASET.data_split,
                                            transforms=transforms,
                                            to_tensor=ToTensorNormalize(),
                                            labels=labels_val,
                                            max_iters=cfg.VAL.n_iters *
                                            cfg.VAL.n_batch,
                                            n_ways=cfg.TASK.n_ways,
                                            n_shots=cfg.TASK.n_shots,
                                            n_queries=cfg.TASK.n_queries,
                                            permute=cfg.VAL.permute_labels,
                                            exclude_labels=[])
                    if data_name == 'COCO':
                        coco_cls_ids = dataset_val.datasets[
                            0].dataset.coco.getCatIds()
                    testloader = DataLoader(dataset_val,
                                            batch_size=cfg.VAL.n_batch,
                                            shuffle=False,
                                            num_workers=1,
                                            pin_memory=True,
                                            drop_last=False)
                    print(f"Total # of validation Data: {len(dataset)}")

                    #for sample_batched in tqdm.tqdm(testloader):
                    for sample_batched in testloader:
                        feed_dict = data_preprocess(sample_batched,
                                                    cfg,
                                                    is_val=True)
                        if data_name == 'COCO':
                            label_ids = [
                                coco_cls_ids.index(x) + 1
                                for x in sample_batched['class_ids']
                            ]
                        else:
                            label_ids = list(sample_batched['class_ids'])

                        query_pred = segmentation_module(
                            feed_dict, segSize=cfg.DATASET.input_size)
                        metric.record(
                            np.array(query_pred.argmax(dim=1)[0].cpu()),
                            np.array(feed_dict['seg_label'][0].cpu()),
                            labels=label_ids,
                            n_run=run)

                    classIoU, meanIoU = metric.get_mIoU(
                        labels=sorted(labels_val), n_run=run)
                    classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(
                        n_run=run)

            classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU(
                labels=sorted(labels_val))
            classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary(
            )

            print('----- Evaluation Result -----')
            print(f'best meanIoU mean: {best_iou}')
            print(f'meanIoU mean: {meanIoU}')
            print(f'meanIoU std: {meanIoU_std}')
            print(f'meanIoU_binary mean: {meanIoU_binary}')
            print(f'meanIoU_binary std: {meanIoU_std_binary}')

            checkpoint(nets, history, cfg, 'latest')

            if meanIoU > best_iou:
                best_iou = meanIoU
                checkpoint(nets, history, cfg, 'best')
            segmentation_module.train(not cfg.TRAIN.fix_bn)
            if net_objectness and net_objectness_decoder:
                net_objectness.eval()
                net_objectness_decoder.eval()
            net_decoder.use_softmax = False

    print('Training Done!')
Пример #19
0
def main(cfg, gpus):
    torch.cuda.set_device(gpus[0])

    # Network Builders
    net_enc_query = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_enc_query,
        fix_encoder=cfg.TRAIN.fix_encoder)
    net_enc_memory = ModelBuilder.build_encoder_memory_separate(
        arch=cfg.MODEL.arch_memory_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_enc_memory,
        num_class=cfg.TASK.n_ways + 1,
        RGB_mask_combine_val=cfg.DATASET.RGB_mask_combine_val,
        segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate)
    net_att_query = ModelBuilder.build_attention(
        arch=cfg.MODEL.arch_attention,
        input_dim=cfg.MODEL.encoder_dim,
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_att_query)
    net_att_memory = ModelBuilder.build_attention(
        arch=cfg.MODEL.arch_attention,
        input_dim=cfg.MODEL.fc_dim,
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_att_memory)
    net_projection = ModelBuilder.build_projection(
        arch=cfg.MODEL.arch_projection,
        input_dim=cfg.MODEL.encoder_dim,
        fc_dim=cfg.MODEL.projection_dim,
        weights=cfg.MODEL.weights_projection)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        input_dim=cfg.MODEL.decoder_dim,
        fc_dim=cfg.MODEL.decoder_fc_dim,
        ppm_dim=cfg.MODEL.ppm_dim,
        num_class=cfg.TASK.n_ways + 1,
        weights=cfg.MODEL.weights_decoder,
        dropout_rate=cfg.MODEL.dropout_rate,
        use_dropout=cfg.MODEL.use_dropout,
        use_softmax=True)
    if cfg.MODEL.weights_objectness and cfg.MODEL.weights_objectness_decoder:
        '''net_objectness = ModelBuilder.build_objectness(
            arch='resnet50_deeplab',
            weights=cfg.MODEL.weights_objectness,
            fix_encoder=True)
        net_objectness_decoder = ModelBuilder.build_decoder(
            arch='aspp_few_shot',
            input_dim=2048,
            fc_dim=256,
            ppm_dim=256,
            num_class=2,
            weights=cfg.MODEL.weights_objectness_decoder,
            dropout_rate=0.5,
            use_dropout=True)'''
        net_objectness = ModelBuilder.build_objectness(
            arch=cfg.MODEL.arch_objectness,
            weights=cfg.MODEL.weights_objectness,
            fix_encoder=True)
        net_objectness_decoder = ModelBuilder.build_decoder(
            arch='c1_nodropout',
            input_dim=cfg.MODEL.decoder_objectness_dim,
            fc_dim=cfg.MODEL.decoder_objectness_dim,
            ppm_dim=256,
            num_class=2,
            weights=cfg.MODEL.weights_objectness_decoder,
            use_dropout=False)
        for param in net_objectness.parameters():
            param.requires_grad = False
        for param in net_objectness_decoder.parameters():
            param.requires_grad = False
    else:
        net_objectness = None
        net_objectness_decoder = None

    crit = nn.NLLLoss(ignore_index=255)

    segmentation_module = SegmentationAttentionSeparateModule(
        net_enc_query,
        net_enc_memory,
        net_att_query,
        net_att_memory,
        net_decoder,
        net_projection,
        net_objectness,
        net_objectness_decoder,
        crit,
        zero_memory=cfg.MODEL.zero_memory,
        zero_qval=cfg.MODEL.zero_qval,
        normalize_key=cfg.MODEL.normalize_key,
        p_scalar=cfg.MODEL.p_scalar,
        memory_feature_aggregation=cfg.MODEL.memory_feature_aggregation,
        memory_noLabel=cfg.MODEL.memory_noLabel,
        debug=cfg.is_debug or cfg.eval_att_voting,
        mask_feat_downsample_rate=cfg.MODEL.mask_feat_downsample_rate,
        att_mat_downsample_rate=cfg.MODEL.att_mat_downsample_rate,
        objectness_feat_downsample_rate=cfg.MODEL.
        objectness_feat_downsample_rate,
        segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate,
        mask_foreground=cfg.MODEL.mask_foreground,
        global_pool_read=cfg.MODEL.global_pool_read,
        average_memory_voting=cfg.MODEL.average_memory_voting,
        average_memory_voting_nonorm=cfg.MODEL.average_memory_voting_nonorm,
        mask_memory_RGB=cfg.MODEL.mask_memory_RGB,
        linear_classifier_support=cfg.MODEL.linear_classifier_support,
        decay_lamb=cfg.MODEL.decay_lamb,
        linear_classifier_support_only=cfg.MODEL.
        linear_classifier_support_only,
        qread_only=cfg.MODEL.qread_only,
        feature_as_key=cfg.MODEL.feature_as_key,
        objectness_multiply=cfg.MODEL.objectness_multiply)

    segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus)
    segmentation_module.cuda()
    segmentation_module.eval()

    print('###### Prepare data ######')
    data_name = cfg.DATASET.name
    if data_name == 'VOC':
        from dataloaders.customized import voc_fewshot
        make_data = voc_fewshot
        max_label = 20
    elif data_name == 'COCO':
        from dataloaders.customized import coco_fewshot
        make_data = coco_fewshot
        max_label = 80
        split = cfg.DATASET.data_split + '2014'
        annFile = f'{cfg.DATASET.data_dir}/annotations/instances_{split}.json'
        cocoapi = COCO(annFile)
    else:
        raise ValueError('Wrong config for dataset!')
    labels = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][
        cfg.TASK.fold_idx]
    transforms = [Resize_test(size=cfg.DATASET.input_size)]
    transforms = Compose(transforms)

    print('###### Testing begins ######')
    metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs)
    with torch.no_grad():
        for run in range(cfg.VAL.n_runs):
            print(f'### Run {run + 1} ###')
            set_seed(cfg.VAL.seed + run)

            print(f'### Load data ###')
            dataset = make_data(base_dir=cfg.DATASET.data_dir,
                                split=cfg.DATASET.data_split,
                                transforms=transforms,
                                to_tensor=ToTensorNormalize(),
                                labels=labels,
                                max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch,
                                n_ways=cfg.TASK.n_ways,
                                n_shots=cfg.TASK.n_shots,
                                n_queries=cfg.TASK.n_queries,
                                permute=cfg.VAL.permute_labels,
                                exclude_labels=[])
            if data_name == 'COCO':
                coco_cls_ids = dataset.datasets[0].dataset.coco.getCatIds()
            testloader = DataLoader(dataset,
                                    batch_size=cfg.VAL.n_batch,
                                    shuffle=False,
                                    num_workers=1,
                                    pin_memory=True,
                                    drop_last=False)
            print(f"Total # of Data: {len(dataset)}")

            count = 0

            if cfg.multi_scale_test:
                scales = [224, 328, 424]
            else:
                scales = [328]

            for sample_batched in tqdm.tqdm(testloader):
                feed_dict = data_preprocess(sample_batched, cfg)
                if data_name == 'COCO':
                    label_ids = [
                        coco_cls_ids.index(x) + 1
                        for x in sample_batched['class_ids']
                    ]
                else:
                    label_ids = list(sample_batched['class_ids'])

                for q, scale in enumerate(scales):
                    if len(scales) > 1:
                        feed_dict['img_data'] = nn.functional.interpolate(
                            feed_dict['img_data'].cuda(),
                            size=(scale, scale),
                            mode='bilinear')
                    if cfg.eval_att_voting or cfg.is_debug:
                        query_pred, qread, qval, qk_b, mk_b, mv_b, p, feature_enc, feature_memory = segmentation_module(
                            feed_dict,
                            segSize=(feed_dict['seg_label_noresize'].shape[1],
                                     feed_dict['seg_label_noresize'].shape[2]))
                        if cfg.eval_att_voting:
                            height, width = qread.shape[-2], qread.shape[-1]
                            assert p.shape[0] == height * width
                            img_refs_mask_resize = nn.functional.interpolate(
                                feed_dict['img_refs_mask'][0].cuda(),
                                size=(height, width),
                                mode='nearest')
                            img_refs_mask_resize_flat = img_refs_mask_resize[:, 0, :, :].view(
                                img_refs_mask_resize.shape[0], -1)
                            mask_voting_flat = torch.mm(
                                img_refs_mask_resize_flat, p)
                            mask_voting = mask_voting_flat.view(
                                mask_voting_flat.shape[0], height, width)
                            mask_voting = torch.unsqueeze(mask_voting, 0)
                            query_pred = nn.functional.interpolate(
                                mask_voting[:, 0:-1],
                                size=cfg.DATASET.input_size,
                                mode='bilinear',
                                align_corners=False)
                            if cfg.is_debug:
                                np.save(
                                    'debug/img_refs_mask-%04d-%s-%s.npy' %
                                    (count, sample_batched['query_ids'][0][0],
                                     sample_batched['support_ids'][0][0][0]),
                                    img_refs_mask_resize.detach().cpu().float(
                                    ).numpy())
                                np.save(
                                    'debug/query_pred-%04d-%s-%s.npy' %
                                    (count, sample_batched['query_ids'][0][0],
                                     sample_batched['support_ids'][0][0][0]),
                                    query_pred.detach().cpu().float().numpy())
                        if cfg.is_debug:
                            np.save(
                                'debug/qread-%04d-%s-%s.npy' %
                                (count, sample_batched['query_ids'][0][0],
                                 sample_batched['support_ids'][0][0][0]),
                                qread.detach().cpu().float().numpy())
                            np.save(
                                'debug/qval-%04d-%s-%s.npy' %
                                (count, sample_batched['query_ids'][0][0],
                                 sample_batched['support_ids'][0][0][0]),
                                qval.detach().cpu().float().numpy())
                            #np.save('debug/qk_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), qk_b.detach().cpu().float().numpy())
                            #np.save('debug/mk_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), mk_b.detach().cpu().float().numpy())
                            #np.save('debug/mv_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), mv_b.detach().cpu().float().numpy())
                            #np.save('debug/p-%04d-%s-%s.npy'%(count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), p.detach().cpu().float().numpy())
                            #np.save('debug/feature_enc-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), feature_enc[-1].detach().cpu().float().numpy())
                            #np.save('debug/feature_memory-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), feature_memory[-1].detach().cpu().float().numpy())
                    else:
                        #query_pred = segmentation_module(feed_dict, segSize=cfg.DATASET.input_size)
                        query_pred = segmentation_module(
                            feed_dict,
                            segSize=(feed_dict['seg_label_noresize'].shape[1],
                                     feed_dict['seg_label_noresize'].shape[2]))
                    if q == 0:
                        query_pred_final = query_pred / len(scales)
                    else:
                        query_pred_final += query_pred / len(scales)
                query_pred = query_pred_final
                metric.record(np.array(query_pred.argmax(dim=1)[0].cpu()),
                              np.array(
                                  feed_dict['seg_label_noresize'][0].cpu()),
                              labels=label_ids,
                              n_run=run)

                if cfg.VAL.visualize:
                    #print(as_numpy(feed_dict['seg_label'][0].cpu()).shape)
                    #print(as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())).shape)
                    #print(feed_dict['img_data'].cpu().shape)
                    query_name = sample_batched['query_ids'][0][0]
                    support_name = sample_batched['support_ids'][0][0][0]
                    if data_name == 'VOC':
                        img = imread(
                            os.path.join(cfg.DATASET.data_dir, 'JPEGImages',
                                         query_name + '.jpg'))
                    else:
                        query_name = int(query_name)
                        img_meta = cocoapi.loadImgs(query_name)[0]
                        img = imread(
                            os.path.join(cfg.DATASET.data_dir, split,
                                         img_meta['file_name']))
                    #img = imresize(img, cfg.DATASET.input_size)
                    visualize_result(
                        (img, as_numpy(
                            feed_dict['seg_label_noresize'][0].cpu()), '%05d' %
                         (count)),
                        as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())),
                        os.path.join(cfg.DIR, 'result'))
                count += 1

            classIoU, meanIoU = metric.get_mIoU(labels=sorted(labels),
                                                n_run=run)
            classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=run)
            '''_run.log_scalar('classIoU', classIoU.tolist())
            _run.log_scalar('meanIoU', meanIoU.tolist())
            _run.log_scalar('classIoU_binary', classIoU_binary.tolist())
            _run.log_scalar('meanIoU_binary', meanIoU_binary.tolist())
            _log.info(f'classIoU: {classIoU}')
            _log.info(f'meanIoU: {meanIoU}')
            _log.info(f'classIoU_binary: {classIoU_binary}')
            _log.info(f'meanIoU_binary: {meanIoU_binary}')'''

    classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU(
        labels=sorted(labels))
    classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary(
    )

    print('----- Final Result -----')
    print('final_classIoU', classIoU.tolist())
    print('final_classIoU_std', classIoU_std.tolist())
    print('final_meanIoU', meanIoU.tolist())
    print('final_meanIoU_std', meanIoU_std.tolist())
    print('final_classIoU_binary', classIoU_binary.tolist())
    print('final_classIoU_std_binary', classIoU_std_binary.tolist())
    print('final_meanIoU_binary', meanIoU_binary.tolist())
    print('final_meanIoU_std_binary', meanIoU_std_binary.tolist())
    print(f'classIoU mean: {classIoU}')
    print(f'classIoU std: {classIoU_std}')
    print(f'meanIoU mean: {meanIoU}')
    print(f'meanIoU std: {meanIoU_std}')
    print(f'classIoU_binary mean: {classIoU_binary}')
    print(f'classIoU_binary std: {classIoU_std_binary}')
    print(f'meanIoU_binary mean: {meanIoU_binary}')
    print(f'meanIoU_binary std: {meanIoU_std_binary}')
Пример #20
0
input_file = sys.argv[1]
# out_dir=sys.argv[1]
# if os.path.exists(out_dir)==False:
#     os.mkdir(out_dir)
out_file = sys.argv[2]
out_warping_field_path = sys.argv[3]
SEG = 300

rho = 0.1
nframe = 20
Nkeep = 5
batchsize = 8
margin = 64

# Network Builders
builder = ModelBuilder()
net_encoder = builder.build_encoder(
    arch='resnet50dilated',
    fc_dim=2048,
    weights='baseline-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth')
net_decoder = builder.build_decoder(
    arch='ppm_deepsup',
    fc_dim=2048,
    num_class=150,
    weights='baseline-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth',
    use_softmax=True)
crit = torch.nn.NLLLoss(ignore_index=-1)
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit).cuda()
segmentation_module.eval()
normalize = transforms.Normalize(mean=[102.9801, 115.9465, 122.7717],
                                 std=[1., 1., 1.])
Пример #21
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_sound_M = builder.build_sound(arch=args.arch_sound,
                                      fc_dim=args.num_channels,
                                      weights=args.weights_sound_M)
    net_frame_M = builder.build_frame(arch=args.arch_frame,
                                      fc_dim=args.num_channels,
                                      pool_type=args.img_pool,
                                      weights=args.weights_frame_M)

    net_sound_P = builder.build_sound(
        input_nc=2,
        arch=args.arch_sound,
        # fc_dim=args.num_channels,
        fc_dim=1,
        weights=args.weights_sound_P)

    nets = (net_sound_M, net_frame_M, net_sound_P)
    crit = builder.build_criterion(arch=args.loss)

    # Wrap networks
    # set netwrapper forward mode
    # there are there modes for different training stages
    # ['Minus', 'Plus', 'Minus_Plus']
    netwrapper = NetWrapper(nets, crit, mode=args.forward_mode)
    netwrapper = torch.nn.DataParallel(netwrapper,
                                       device_ids=range(args.num_gpus))
    netwrapper.to(args.device)

    # Dataset and Loader
    dataset_train = MUSICMixDataset(args.list_train, args, split='train')
    dataset_val = MUSICMixDataset(args.list_val,
                                  args,
                                  max_sample=args.num_val,
                                  split='val')

    loader_train = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=int(args.workers),
                                               drop_last=True)
    loader_val = torch.utils.data.DataLoader(dataset_val,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=2,
                                             drop_last=False)
    args.epoch_iters = len(dataset_train) // args.batch_size
    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # Set up optimizer
    optimizer = MP_Trainer.create_optimizer(nets, args)

    mp_trainer = MP_Trainer(netwrapper, optimizer, args)

    # Eval firstly
    mp_trainer.evaluate(loader_val)
    if mp_trainer.mode == 'eval':
        print('Evaluation Done!')
    else:
        # start training
        for epoch in range(1, args.num_epoch + 1):
            mp_trainer.epoch = epoch
            mp_trainer.train(loader_train)

            # Evaluation and visualization
            if epoch % args.eval_epoch == 0:
                mp_trainer.evaluate(loader_val)

                # checkpointing
                mp_trainer.checkpoint()

            # adjust learning rate
            if epoch in args.lr_steps:
                mp_trainer.adjust_learning_rate()

        print('Training Done!')
        mp_trainer.writer.close()
Пример #22
0
def main(cfg, gpus):
    # Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    crit = nn.NLLLoss(ignore_index=-1)

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        dataset_train)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=cfg.TRAIN.batch_size_per_gpu,  # 
        shuffle=(train_sampler is None),  # we do not use this param
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True,
        sampler=train_sampler)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)

    segmentation_module.cuda()

    if cfg.sync_bn:
        print("using apex synced BN")
        segmentation_module = apex.parallel.convert_syncbn_model(
            segmentation_module)

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    segmentation_module, optimizers = amp.initialize(segmentation_module,
                                                     optimizers,
                                                     opt_level="O1")

    if cfg.distributed:
        # FOR DISTRIBUTED:  After amp.initialize, wrap the model with
        # apex.parallel.DistributedDataParallel.
        segmentation_module = DistributedDataParallel(segmentation_module)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)

        # checkpointing
        # checkpoint(nets, history, cfg, epoch+1)
        checkpoint_apex(segmentation_module, history, cfg, epoch + 1)

    print('Training Done!')
Пример #23
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_encoder = None
    net_decoder = None
    unet = None

    if args.unet == False:
        net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                            fc_dim=args.fc_dim,
                                            weights=args.weights_encoder)
        net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                            fc_dim=args.fc_dim,
                                            num_class=args.num_class,
                                            weights=args.weights_decoder)
    else:
        unet = builder.build_unet(num_class=args.num_class,
                                  arch=args.unet_arch,
                                  weights=args.weights_unet)

        print("Froze the following layers: ")
        for name, p in unet.named_parameters():
            if p.requires_grad == False:
                print(name)

    crit = nn.NLLLoss()
    #crit = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(50))
    #crit = nn.CrossEntropyLoss().cuda()
    #crit = nn.BCELoss()

    if args.arch_decoder.endswith('deepsup') and args.unet == False:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit, args.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder,
                                                 net_decoder,
                                                 crit,
                                                 is_unet=args.unet,
                                                 unet=unet)

    train_augs = Compose([
        RandomSized(224),
        RandomHorizontallyFlip(),
        RandomVerticallyFlip(),
        RandomRotate(180),
        AdjustContrast(cf=0.25),
        AdjustBrightness(bf=0.25)
    ])  #, RandomErasing()])
    #train_augs = None
    # Dataset and Loader
    dataset_train = TrainDataset(args.list_train,
                                 args,
                                 batch_per_gpu=args.batch_size_per_gpu,
                                 augmentations=train_augs)

    loader_train = data.DataLoader(
        dataset_train,
        batch_size=len(args.gpus),  # we have modified data_parallel
        shuffle=False,  # we do not use this param
        num_workers=int(args.workers),
        drop_last=True,
        pin_memory=False)

    print('1 Epoch = {} iters'.format(args.epoch_iters))
    # create loader iterator
    iterator_train = iter(loader_train)

    # load nets into gpu
    if len(args.gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=args.gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit) if args.unet == False else (unet,
                                                                        crit)
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(args.start_epoch, args.num_epoch + 1):
        train(segmentation_module, iterator_train, optimizers, history, epoch,
              args)
        # checkpointing
        checkpoint(nets, history, args, epoch)

    print('Training Done!')
Пример #24
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                        fc_dim=args.fc_dim,
                                        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                        fc_dim=args.fc_dim,
                                        segSize=args.segSize,
                                        weights=args.weights_decoder)

    crit = nn.NLLLoss2d(ignore_index=-1)

    # Dataset and Loader
    dataset_train = Dataset(args.list_train, args, flip=args.flip, is_train=1)
    dataset_val = Dataset(args.list_val,
                          args,
                          flip=args.flip,
                          max_sample=args.num_val,
                          is_train=0)
    loader_train = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=int(args.workers),
                                               drop_last=True)
    loader_val = torch.utils.data.DataLoader(dataset_val,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=2,
                                             drop_last=True)
    args.epoch_iters = int(len(dataset_train) / args.batch_size)
    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # load nets into gpu
    if args.num_gpus > 1:
        net_encoder = nn.DataParallel(net_encoder,
                                      device_ids=range(args.num_gpus))
        net_decoder = nn.DataParallel(net_decoder,
                                      device_ids=range(args.num_gpus))
    nets = (net_encoder, net_decoder, crit)
    for net in nets:
        net.cuda()

    # Set up optimizers
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {
        split: {
            'iter': [],
            'err': [],
            'acc': []
        }
        for split in ('train', 'val')
    }
    # initial eval
    evaluate(nets, loader_val, history, 0, args)
    for epoch in range(1, args.num_epoch + 1):
        train(nets, loader_train, optimizers, history, epoch, args)

        # Evaluation and visualization
        if epoch % args.eval_epoch == 0:
            evaluate(nets, loader_val, history, epoch, args)

        # checkpointing
        if epoch % args.ckpt_epoch == 0:
            checkpoint(nets, history, epoch, args)

        # adjust learning rate
        if epoch % args.lr_step == 0:
            adjust_learning_rate(optimizers, args)

    print('Training Done!')
Пример #25
0

def overlay(img, pred_color, blend_factor=0.3):
    edges = cv2.Canny(pred_color,20,40)
    edges = cv2.dilate(edges, np.ones((5,5),np.uint8), iterations=1)
    out = (1-blend_factor)*img + blend_factor * pred_color
    edge_pixels = (edges==255)
    new_color = [0,0,255]
    for i in range(0,3):
        timg = out[:,:,i]
        timg[edge_pixels]=new_color[i]
        out[:,:,i] = timg
    return out

# Network Builders
builder = ModelBuilder()
net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder)
net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_class, weights=args.weights_decoder, use_softmax=True)
crit = nn.NLLLoss(ignore_index=-1)
input_fns = [os.path.join(args.test_folder,f) for f in os.listdir(args.test_folder)]
output_fns = [os.path.join(args.result,f[0:-3]+"pgm") for f in os.listdir(args.test_folder)]
output_vis_fns = [os.path.join(args.result,"vis_" + f) for f in os.listdir(args.test_folder)]
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
segmentation_module.cuda()
segmentation_module.eval()
colors = loadmat('data/color150.mat')['colors']

transform = transforms.Compose([transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])])
feed_dict = {}
for f,of,ovf in zip(input_fns,output_fns,output_vis_fns):
    print("Input: " + f)
Пример #26
0
def main():
    global best_prec1, args

    args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    args.total_batch_size = args.world_size * args.batch_size




    # create model
    if os.path.isfile("architectures.json")==False:
        print("missing architectures.json!")
        return

    modelstr=json.load(open('architectures.json','r'))[args.hardware]
    print('current hardware:',args.hardware,'model:',modelstr)
    mconfig=NetworkConfig(args.num_classes).build_modelconfig(modelstr.split(','))
    model=ModelBuilder(mconfig,args.num_classes)
    model=init_weights(model) ## init weights with xavier
    params=split_weights(model) ## apply no weight decay
    model = model.cuda()

    if args.distributed:
        # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf
        # for the older version of APEX please use shared_param, for newer one it is delay_allreduce
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    criterion_smooth = CrossEntropyLabelSmooth(args.num_classes, 0.1)
    criterion_smooth = criterion_smooth.cuda()

    optimizer = torch.optim.SGD(params, args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,nesterov=True)

    # optionally resume from a checkpoint
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs-5)*1251)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,90])
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu))
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # Data loading code

    traindir = os.path.join(args.dataset, 'train')
    valdir = os.path.join(args.dataset, 'val')



    crop_size = 224
    val_size = 256

    pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu)
    pipe.build()
    train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size)
    pipe.build()
    val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size))

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    total_time = AverageMeter()
    dicts={}
    iter_per_epoch=int(train_loader._size / args.batch_size)
    print('iter_per_epoch',iter_per_epoch)

    #print('here2')
    for epoch in range(args.start_epoch, args.epochs):

        avg_train_time = train(train_loader, scheduler,model, criterion_smooth, optimizer, epoch)
        total_time.update(avg_train_time)
        if args.prof:
            break
        # evaluate on validation set
        [prec1, prec5] = validate(val_loader, model, criterion)


        # remember best prec@1 and save checkpoint
        if args.local_rank == 0:
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)


            dicts[epoch]={}
            dicts[epoch]['top1']=prec1
            dicts[epoch]['top5']=prec5
            fw=open(args.rootdir+"/"+args.filename+".json",'w')
            wdata=json.dumps(dicts, indent=4)
            fw.write(wdata+'\n')
            fw.flush()
            save_checkpoint(
                     model.state_dict(),is_best,filename=args.rootdir+"/"+args.filename+".pth.tar")

            if epoch == args.epochs - 1:
                print('##Top-1 {0}\n'
                      '##Top-5 {1}\n'
                      '##Perf  {2}'.format(prec1, prec5, args.total_batch_size / total_time.avg))

        # reset DALI iterators
        train_loader.reset()
        val_loader.reset()
Пример #27
0
def main():
    """Create the model and start the training."""
    with open(args.config) as f:
        config = yaml.load(f)
    for k, v in config['common'].items():
        setattr(args, k, v)
    mkdirs(osp.join("logs/" + args.exp_name))

    logger = create_logger('global_logger',
                           "logs/" + args.exp_name + '/log.txt')
    logger.info('{}'.format(args))
    ##############################

    for key, val in vars(args).items():
        logger.info("{:16} {}".format(key, val))
    logger.info("random_scale {}".format(args.random_scale))
    logger.info("is_training {}".format(args.is_training))

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    h, w = map(int, args.input_size_target.split(','))
    input_size_target = (h, w)
    print(type(input_size_target[1]))
    cudnn.enabled = True
    args.snapshot_dir = args.snapshot_dir + args.exp_name
    tb_logger = SummaryWriter("logs/" + args.exp_name)
    ##############################

    #validation data
    h, w = map(int, args.input_size_test.split(','))
    input_size_test = (h, w)
    h, w = map(int, args.com_size.split(','))
    com_size = (h, w)
    h, w = map(int, args.input_size_crop.split(','))
    input_size_crop = h, w
    h, w = map(int, args.input_size_target_crop.split(','))
    input_size_target_crop = h, w

    test_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])
    test_transform = transforms.Compose([
        transforms.Resize((input_size_test[1], input_size_test[0])),
        transforms.ToTensor(), test_normalize
    ])

    testloader = data.DataLoader(cityscapesDataSet(args.data_dir_target,
                                                   args.data_list_target_val,
                                                   crop_size=input_size_test,
                                                   set='train',
                                                   transform=test_transform),
                                 num_workers=args.num_workers,
                                 batch_size=1,
                                 shuffle=False,
                                 pin_memory=True)
    with open('./dataset/cityscapes_list/info.json', 'r') as fp:
        info = json.load(fp)
    mapping = np.array(info['label2train'], dtype=np.int)
    label_path_list_val = args.label_path_list_val
    label_path_list_test = './dataset/cityscapes_list/label.txt'
    gt_imgs_val = open(label_path_list_val, 'r').read().splitlines()
    gt_imgs_val = [osp.join(args.data_dir_target_val, x) for x in gt_imgs_val]
    test1loader = data.DataLoader(cityscapesDataSet(args.data_dir_target,
                                                    args.data_list_target_test,
                                                    crop_size=input_size_test,
                                                    set='val',
                                                    transform=test_transform),
                                  num_workers=args.num_workers,
                                  batch_size=1,
                                  shuffle=False,
                                  pin_memory=True)

    gt_imgs_test = open(label_path_list_test, 'r').read().splitlines()
    gt_imgs_test = [
        osp.join(args.data_dir_target_test, x) for x in gt_imgs_test
    ]

    name_classes = np.array(info['label'], dtype=np.str)
    interp_val = nn.Upsample(size=(com_size[1], com_size[0]),
                             mode='bilinear',
                             align_corners=True)

    ####
    #build model
    ####
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                        fc_dim=args.fc_dim,
                                        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                        fc_dim=args.fc_dim,
                                        num_class=args.num_classes,
                                        weights=args.weights_decoder,
                                        use_aux=True)

    model = SegmentationModule(net_encoder, net_decoder, args.use_aux)

    if args.num_gpus > 1:
        model = torch.nn.DataParallel(model)
        patch_replication_callback(model)
    model.cuda()

    nets = (net_encoder, net_decoder, None, None)
    optimizers = create_optimizer(nets, args)
    cudnn.enabled = True
    cudnn.benchmark = True
    model.train()

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    source_normalize = transforms_seg.Normalize(mean=mean, std=std)

    mean_mapping = [0.485, 0.456, 0.406]
    mean_mapping = [item * 255 for item in mean_mapping]

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)
    source_transform = transforms_seg.Compose([
        transforms_seg.Resize([input_size[1], input_size[0]]),
        segtransforms.RandScale((args.scale_min, args.scale_max)),
        segtransforms.RandRotate((args.rotate_min, args.rotate_max),
                                 padding=mean_mapping,
                                 ignore_label=args.ignore_label),
        #segtransforms.RandomGaussianBlur(),
        #segtransforms.RandomHorizontalFlip(),
        segtransforms.Crop([input_size_crop[1], input_size_crop[0]],
                           crop_type='rand',
                           padding=mean_mapping,
                           ignore_label=args.ignore_label),
        transforms_seg.ToTensor(),
        source_normalize
    ])
    target_normalize = transforms_seg.Normalize(mean=mean, std=std)
    target_transform = transforms_seg.Compose([
        transforms_seg.Resize([input_size_target[1], input_size_target[0]]),
        segtransforms.RandScale((args.scale_min, args.scale_max)),
        segtransforms.RandRotate((args.rotate_min, args.rotate_max),
                                 padding=mean_mapping,
                                 ignore_label=args.ignore_label),
        #segtransforms.RandomGaussianBlur(),
        #segtransforms.RandomHorizontalFlip(),
        segtransforms.Crop(
            [input_size_target_crop[1], input_size_target_crop[0]],
            crop_type='rand',
            padding=mean_mapping,
            ignore_label=args.ignore_label),
        transforms_seg.ToTensor(),
        target_normalize
    ])
    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              transform=source_transform),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=1,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(fake_cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        set=args.set,
        transform=target_transform),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=1,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)
    # implement model.optim_parameters(args) to handle different models' lr setting

    criterion_seg = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False)
    criterion_pseudo = torch.nn.BCEWithLogitsLoss(reduce=False).cuda()
    bce_loss = torch.nn.BCEWithLogitsLoss().cuda()
    criterion_reconst = torch.nn.L1Loss().cuda()
    criterion_soft_pseudo = torch.nn.MSELoss(reduce=False).cuda()
    criterion_box = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False)
    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         align_corners=True,
                         mode='bilinear')
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                align_corners=True,
                                mode='bilinear')

    # labels for adversarial training
    source_label = 0
    target_label = 1

    optimizer_encoder, optimizer_decoder, optimizer_disc, optimizer_reconst = optimizers
    batch_time = AverageMeter(10)
    loss_seg_value1 = AverageMeter(10)
    best_mIoUs = 0
    best_test_mIoUs = 0
    loss_seg_value2 = AverageMeter(10)
    loss_reconst_source_value = AverageMeter(10)
    loss_reconst_target_value = AverageMeter(10)
    loss_source_disc_value = AverageMeter(10)
    loss_source_disc_adv_value = AverageMeter(10)
    loss_balance_value = AverageMeter(10)
    loss_target_disc_value = AverageMeter(10)
    loss_target_disc_adv_value = AverageMeter(10)
    loss_pseudo_value = AverageMeter(10)
    bounding_num = AverageMeter(10)
    pseudo_num = AverageMeter(10)
    loss_bbx_att_value = AverageMeter(10)

    for i_iter in range(args.num_steps):
        # train G

        # don't accumulate grads in D

        end = time.time()
        _, batch = trainloader_iter.__next__()
        images, labels, _ = batch
        images = Variable(images).cuda(async=True)
        labels = Variable(labels).cuda(async=True)
        seg, loss_seg2 = model(images, labels)

        loss_seg2 = torch.mean(loss_seg2)
        loss = args.lambda_trade_off * (loss_seg2)
        '''
        source_tensor = Variable(torch.FloatTensor(disc.size()).fill_(source_label)).cuda()
        loss_source_disc = bce_loss(disc, source_tensor)

        loss += loss_source_disc * args.lambda_disc
        '''
        # proper normalization
        #logger.info(loss_seg1.data.cpu().numpy())
        loss_seg_value2.update(loss_seg2.data.cpu().numpy())
        #loss_source_disc_value.update(loss_source_disc.data.cpu().numpy())
        # train with target
        optimizer_encoder.zero_grad()
        optimizer_decoder.zero_grad()
        loss.backward()
        #optimizer.step()
        optimizer_encoder.step()
        optimizer_decoder.step()
        #optimizer_disc.step()

        del seg, loss_seg2

        batch_time.update(time.time() - end)

        remain_iter = args.num_steps - i_iter
        remain_time = remain_iter * batch_time.avg
        t_m, t_s = divmod(remain_time, 60)
        t_h, t_m = divmod(t_m, 60)
        remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m),
                                                    int(t_s))

        adjust_learning_rate(optimizer_encoder, i_iter, args.lr_encoder, args)
        adjust_learning_rate(optimizer_decoder, i_iter, args.lr_decoder, args)
        if i_iter % args.print_freq == 0:
            lr_encoder = optimizer_encoder.param_groups[0]['lr']
            lr_decoder = optimizer_decoder.param_groups[0]['lr']
            logger.info('exp = {}'.format(args.snapshot_dir))
            logger.info(
                'Iter = [{0}/{1}]\t'
                'Time = {batch_time.avg:.3f}\t'
                'loss_seg1 = {loss_seg1.avg:4f}\t'
                'loss_seg2 = {loss_seg2.avg:.4f}\t'
                'loss_source_disc = {loss_source_disc.avg:.4f}\t'
                'loss_source_disc_adv = {loss_source_disc_adv.avg:.4f}\t'
                'loss_target_disc = {loss_target_disc.avg:.4f}\t'
                'loss_target_disc_adv = {loss_target_disc_adv.avg:.4f}\t'
                'loss_reconst_source = {loss_reconst_source.avg:.4f}\t'
                'loss_bbx_att = {loss_bbx_att.avg:.4f}\t'
                'loss_reconst_target = {loss_reconst_target.avg:.4f}\t'
                'loss_pseudo = {loss_pseudo.avg:.4f}\t'
                'loss_balance = {loss_balance.avg:.4f}\t'
                'bounding_num = {bounding_num.avg:.4f}\t'
                'pseudo_num = {pseudo_num.avg:4f}\t'
                'lr_encoder = {lr_encoder:.8f} lr_decoder = {lr_decoder:.8f}'.
                format(i_iter,
                       args.num_steps,
                       batch_time=batch_time,
                       loss_seg1=loss_seg_value1,
                       loss_seg2=loss_seg_value2,
                       loss_source_disc=loss_source_disc_value,
                       loss_pseudo=loss_pseudo_value,
                       loss_source_disc_adv=loss_source_disc_adv_value,
                       loss_bbx_att=loss_bbx_att_value,
                       bounding_num=bounding_num,
                       pseudo_num=pseudo_num,
                       loss_target_disc=loss_target_disc_value,
                       loss_target_disc_adv=loss_target_disc_adv_value,
                       loss_reconst_source=loss_reconst_source_value,
                       loss_balance=loss_balance_value,
                       loss_reconst_target=loss_reconst_target_value,
                       lr_encoder=lr_encoder,
                       lr_decoder=lr_decoder))

            logger.info("remain_time: {}".format(remain_time))
            if not tb_logger is None:
                tb_logger.add_scalar('loss_seg_value1', loss_seg_value1.avg,
                                     i_iter)
                tb_logger.add_scalar('loss_seg_value2', loss_seg_value2.avg,
                                     i_iter)
                tb_logger.add_scalar('loss_source_disc',
                                     loss_source_disc_value.avg, i_iter)
                tb_logger.add_scalar('loss_source_disc_adv',
                                     loss_source_disc_adv_value.avg, i_iter)
                tb_logger.add_scalar('loss_target_disc',
                                     loss_target_disc_value.avg, i_iter)
                tb_logger.add_scalar('loss_target_disc_adv',
                                     loss_target_disc_adv_value.avg, i_iter)
                tb_logger.add_scalar('bounding_num', bounding_num.avg, i_iter)
                tb_logger.add_scalar('pseudo_num', pseudo_num.avg, i_iter)
                tb_logger.add_scalar('loss_pseudo', loss_pseudo_value.avg,
                                     i_iter)
                tb_logger.add_scalar('lr', lr_encoder, i_iter)
                tb_logger.add_scalar('loss_balance', loss_balance_value.avg,
                                     i_iter)
            #####
            #save image result

            if i_iter % args.save_pred_every == 0 and i_iter != 0:
                logger.info('taking snapshot ...')
                model.eval()

                val_time = time.time()
                hist = np.zeros((19, 19))
                f = open(args.result_dir, 'a')
                for index, batch in tqdm(enumerate(testloader)):
                    with torch.no_grad():
                        image, name = batch
                        output2, _ = model(Variable(image).cuda(), None)
                        pred = interp_val(output2)
                        del output2
                        pred = pred.cpu().data[0].numpy()
                        pred = pred.transpose(1, 2, 0)
                        pred = np.asarray(np.argmax(pred, axis=2),
                                          dtype=np.uint8)
                        label = np.array(Image.open(gt_imgs_val[index]))
                        #label = np.array(label.resize(com_size, Image.
                        label = label_mapping(label, mapping)
                        #logger.info(label.shape)
                        hist += fast_hist(label.flatten(), pred.flatten(), 19)
                mIoUs = per_class_iu(hist)
                for ind_class in range(args.num_classes):
                    logger.info('===>' + name_classes[ind_class] + ':\t' +
                                str(round(mIoUs[ind_class] * 100, 2)))
                    tb_logger.add_scalar(name_classes[ind_class] + '_mIoU',
                                         mIoUs[ind_class], i_iter)

                mIoUs = round(np.nanmean(mIoUs) * 100, 2)

                logger.info(mIoUs)
                tb_logger.add_scalar('val mIoU', mIoUs, i_iter)
                tb_logger.add_scalar('val mIoU', mIoUs, i_iter)
                net_encoder, net_decoder, net_disc, net_reconst = nets
                save_checkpoint(net_encoder, 'encoder', i_iter, args,
                                is_best_test)
                save_checkpoint(net_decoder, 'decoder', i_iter, args,
                                is_best_test)
            model.train()
Пример #28
0
def main(args):
    # Network Builders
    builder = ModelBuilder()
    net_sound_ground = builder.build_sound_ground(
        arch=args.arch_sound_ground, weights=args.weights_sound_ground)
    net_frame_ground = builder.build_frame_ground(
        arch=args.arch_frame_ground,
        pool_type=args.img_pool,
        weights=args.weights_frame_ground)
    net_grounding = builder.build_grounding(arch=args.arch_grounding,
                                            weights=args.weights_grounding)
    nets = (net_sound_ground, net_frame_ground, net_grounding)
    crit = builder.build_criterion(arch=args.loss)

    # Dataset and Loader
    dataset_train = MUSICMixDataset(args.list_train, args, split='train')
    dataset_val = MUSICMixDataset(args.list_val,
                                  args,
                                  max_sample=args.num_val,
                                  split=args.split)

    loader_train = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=int(args.workers),
                                               drop_last=True)
    loader_val = torch.utils.data.DataLoader(dataset_val,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=2,
                                             drop_last=False)
    args.epoch_iters = len(dataset_train) // args.batch_size
    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # Wrap networks
    netWrapper = NetWrapper(nets, crit)
    netWrapper = torch.nn.DataParallel(netWrapper,
                                       device_ids=range(args.num_gpus))
    netWrapper.to(args.device)

    # Set up optimizer
    optimizer = create_optimizer(nets, args)

    # History of performance
    history = {
        'train': {
            'epoch': [],
            'err': []
        },
        'val': {
            'epoch': [],
            'err': []
        }
    }

    # Eval mode
    if args.mode == 'eval':
        evaluate(netWrapper, loader_val, history, 0, args)
        print('Evaluation Done!')
        return

    # Training loop
    for epoch in range(1, args.num_epoch + 1):
        train(netWrapper, loader_train, optimizer, history, epoch, args)

        # Evaluation and visualization
        if epoch % args.eval_epoch == 0:
            evaluate(netWrapper, loader_val, history, epoch, args)
            # checkpointing
            checkpoint(nets, history, epoch, args)

        # drop learning rate
        if epoch in args.lr_steps:
            adjust_learning_rate(optimizer, args)

    print('Training Done!')
Пример #29
0
def main(args):
    # Dataset
    dataset_train = Dataset(args,
                            split_name='train',
                            batch_per_gpu=args.batch_size_per_gpu)

    # Network Builders
    builder = ModelBuilder()
    net_encoder = builder.build_encoder(arch=args.arch_encoder,
                                        fc_dim=args.fc_dim,
                                        freeze_until=args.freeze_until,
                                        weights=args.weights_encoder)
    net_decoder = builder.build_decoder(arch=args.arch_decoder,
                                        fc_dim=args.fc_dim,
                                        num_class=dataset_train.num_classes,
                                        weights=args.weights_decoder)

    net_encoder.train()

    crit = nn.NLLLoss(ignore_index=-1)

    if args.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit, args.deep_sup_scale)

    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # loader
    loader_train = torchdata.DataLoader(
        dataset_train,
        batch_size=len(args.gpus),  # we have modified data_parallel
        shuffle=False,
        collate_fn=user_scattered_collate,
        num_workers=int(args.workers),
        drop_last=True,
        pin_memory=True)

    print('1 Epoch = {} iters'.format(args.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)

    # load nets into gpu
    if len(args.gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=args.gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, args)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(args.start_epoch, args.num_epoch + 1):
        train(segmentation_module, iterator_train, optimizers, history, epoch,
              args)

        # checkpointing
        checkpoint(nets, history, args, epoch)

    print('Training Done!')
Пример #30
0
def main(args):
    # import network architecture
    builder = ModelBuilder()
    model = builder.build_net(
            arch=args.id, 
            num_input=args.num_input, 
            num_classes=args.num_classes, 
            num_branches=args.num_branches,
            padding_list=args.padding_list, 
            dilation_list=args.dilation_list)
    model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda()
    cudnn.benchmark = True
    
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            state_dict = checkpoint['state_dict']
            model.load_state_dict(state_dict)           
            print("=> Loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        else:
            raise Exception("=> No checkpoint found at '{}'".format(args.resume))         
    
    # initialization      
    num_ignore = 0
    margin = [args.crop_size[k] - args.center_size[k] for k in range(3)]
    num_images = int(len(val_dir)/args.num_input)
    dice_score = np.zeros([num_images, 3]).astype(float)

    for i in range(num_images):
        # load the images, label and mask
        im = []
        for j in range(args.num_input):
            direct, _ = val_dir[args.num_input * i + j].split("\n")
            name = direct
            if j < args.num_input - 1:
                image = nib.load(args.root_path + direct + '.gz').get_data()
                image = np.expand_dims(image, axis=0)
                im.append(image)
                if j == 0:
                    mask = nib.load(args.root_path + direct + "/mask.nii.gz").get_data()
            else:
                labels = nib.load(args.root_path + direct + '.gz').get_data()
        
        images = np.concatenate(im, axis=0).astype(float)

        # divide the input images input small image segments
        # return the padding input images which can be divided exactly
        image_pad, mask_pad, label_pad, num_segments, padding_index, index = segment(images, mask, labels, args)

        # initialize prediction for the whole image as background
        labels_shape = list(labels.shape)
        labels_shape.append(args.num_classes)
        pred = np.zeros(labels_shape)
        pred[:,:,:,0] = 1
            
        # initialize the prediction for a small segmentation as background
        pad_shape = [int(num_segments[k] * args.center_size[k]) for k in range(3)]
        pad_shape.append(args.num_classes)
        pred_pad = np.zeros(pad_shape)  
        pred_pad[:,:,:,0] = 1 

        # score_per_image stores the sum of each image
        score_per_image = np.zeros([3, 3])
        # iterate over the z dimension
        for idz in range(num_segments[2]):
            tf = ValDataset(image_pad, label_pad, mask_pad, num_segments, idz, args)
            val_loader = DataLoader(tf, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, pin_memory=False)
            score_seg, pred_seg = val(val_loader, model, num_segments, args)
            pred_pad[:, :, idz*args.center_size[2]:(idz+1)*args.center_size[2], :] = pred_seg        
            score_per_image += score_seg
                
        # decide the start and end point in the original image
        for k in range(3):
            if index[0][k] == 0:
                index[0][k] = int(margin[k]/2 - padding_index[0][k])
            else:
                index[0][k] = int(margin[k]/2 + index[0][k])

            index[1][k] = int(min(index[0][k] + num_segments[k] * args.center_size[k], labels.shape[k]))

        dist = [index[1][k] - index[0][k] for k in range(3)]
        pred[index[0][0]:index[1][0], index[0][1]:index[1][1], index[0][2]:index[1][2]] = pred_pad[:dist[0], :dist[1], :dist[2]]
            
        if np.sum(score_per_image[0,:]) == 0 or np.sum(score_per_image[1,:]) == 0 or np.sum(score_per_image[2,:]) == 0:
            num_ignore += 1
            continue 
        # compute the Enhance, Core and Whole dice score
        dice_score_per = [2 * np.sum(score_per_image[k,2]) / (np.sum(score_per_image[k,0]) + np.sum(score_per_image[k,1])) for k in range(3)]   
        print('Image: %d, Enhance score: %.4f, Core score: %.4f, Whole score: %.4f' % (i, dice_score_per[0], dice_score_per[1], dice_score_per[2]))           
        
        dice_score[i, :] = dice_score_per
        
    count_image = num_images - num_ignore
    dice_score = dice_score[:count_image,:]
    mean_dice = np.mean(dice_score, axis=0)
    std_dice = np.std(dice_score, axis=0)
    print('Evalution Done!')
    print('Enhance score: %.4f, Core score: %.4f, Whole score: %.4f, Mean Dice score: %.4f' % (mean_dice[0], mean_dice[1], mean_dice[2], np.mean(mean_dice)))
    print('Enhance std: %.4f, Core std: %.4f, Whole std: %.4f, Mean Std: %.4f' % (std_dice[0], std_dice[1], std_dice[2], np.mean(std_dice)))                      
    if np.mean(mean_dice) > args.best_mean:
        args.best_epoch = args.epoch_index
        args.best_mean = np.mean(mean_dice)
def main(cfg, gpus):
    if 'CITYSCAPE' in cfg.DATASET.list_train:
        crit = nn.NLLLoss(ignore_index=19)
    else:
        crit = nn.NLLLoss(ignore_index=-2)
    # Segmentation Network Builders
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder,
        dilate_rate=cfg.DATASET.segm_downsampling_rate)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)
    segmentation_module = SegmentationModule(net_encoder, net_decoder, crit,
                                             cfg)
    segmentation_module.cuda()
    nets = (net_encoder, net_decoder, crit)
    # Foveation Network Builders
    if cfg.MODEL.foveation:
        net_foveater = ModelBuilder.build_foveater(
            in_channel=cfg.MODEL.in_dim,
            out_channel=len(cfg.MODEL.patch_bank),
            len_gpus=len(gpus),
            weights=cfg.MODEL.weights_foveater,
            cfg=cfg)
        foveation_module = FovSegmentationModule(net_foveater,
                                                 cfg,
                                                 len_gpus=len(gpus))
        foveation_module.cuda()
        nets = (net_encoder, net_decoder, crit, net_foveater)
    # Set up optimizers
    optimizers = create_optimizers(nets, cfg)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train, cfg.DATASET)
    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),  # customerized pre-batched dataset
        pin_memory=True)

    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))
    # create loader iterator
    iterator_train = iter(loader_train)
    # Main loop
    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        if cfg.MODEL.foveation:
            train(segmentation_module,
                  iterator_train,
                  optimizers,
                  epoch + 1,
                  cfg,
                  history=None,
                  foveation_module=foveation_module)
        else:
            train(segmentation_module, iterator_train, optimizers, epoch + 1,
                  cfg)
        # save checkpoint
        checkpoint_last(nets, cfg, epoch + 1)
        # eval during train
        if cfg.MODEL.foveation:
            val_iou, val_acc, F_Xlr, F_Xlr_score_flat = eval_during_train(cfg)
        else:
            val_iou, val_acc = eval_during_train(cfg)
    print('Training Done!')
def main(args):
    # import network architecture
    builder = ModelBuilder()
    model = builder.build_net(
            arch=args.id, 
            num_input=args.num_input + 1, 
            num_classes=args.num_classes, 
            num_branches=args.num_branches,
            padding_list=args.padding_list, 
            dilation_list=args.dilation_list)
    model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda()
    cudnn.benchmark = True
    
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            state_dict = checkpoint['state_dict']
            model.load_state_dict(state_dict)           
            print("=> Loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        else:
            raise Exception("=> No checkpoint found at '{}'".format(args.resume))         
    
    # initialization      
    num_ignore = 0
    margin = [args.crop_size[k] - args.center_size[k] for k in range(3)]
    num_images = int(len(test_dir)/args.num_input)
    dice_score = np.zeros([num_images, 3]).astype(float)

    for i in range(num_images):
        # load the images and mask
        im = []
        for j in range(args.num_input):
            direct, _ = test_dir[args.num_input * i + j].split("\n")
            name = direct            
            image = nib.load(args.root_path + direct).get_data()
            image = np.expand_dims(image, axis=0)
            im.append(image)
            if j == 0:
                mask = nib.load(args.root_path + direct + "mask/mask.nii").get_data()
                   
        images = np.concatenate(im, axis=0).astype(float)

        # divide the input images input small image segments
        # return the padding input images which can be divided exactly
        image_pad, mask_pad, num_segments, padding_index, index = segment(images, mask, args)

        # initialize prediction for the whole image as background
        mask_shape = list(mask.shape)
        mask_shape.append(args.num_classes)
        pred = np.zeros(mask_shape)
        pred[:,:,:,0] = 1
            
        # initialize the prediction for a small segmentation as background
        pad_shape = [int(num_segments[k] * args.center_size[k]) for k in range(3)]
        pad_shape.append(args.num_classes)
        pred_pad = np.zeros(pad_shape)  
        pred_pad[:,:,:,0] = 1 

        
        # iterate over the z dimension
        for idz in range(num_segments[2]):
            tf = TestDataset(image_pad, mask_pad, num_segments, idz, args)
            test_loader = DataLoader(tf, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, pin_memory=False)
            pred_seg = test(test_loader, model, num_segments, args)
            pred_pad[:, :, idz*args.center_size[2]:(idz+1)*args.center_size[2], :] = pred_seg        
           
                
        # decide the start and end point in the original image
        for k in range(3):
            if index[0][k] == 0:
                index[0][k] = int(margin[k]/2 - padding_index[0][k])
            else:
                index[0][k] = int(margin[k]/2 + index[0][k])

            index[1][k] = int(min(index[0][k] + num_segments[k] * args.center_size[k], mask.shape[k]))

        dist = [index[1][k] - index[0][k] for k in range(3)]
        pred[index[0][0]:index[1][0], index[0][1]:index[1][1], index[0][2]:index[1][2]] = pred_pad[:dist[0], :dist[1], :dist[2]]
            
        if args.visualize:
            vis = np.argmax(pred, axis=3)
            vis = np.swapaxes(vis, 0, 2).astype(dtype=np.uint8)
            visualize_result(name, vis, args)
           
    print('Evalution Done!')
def train_model(audio_gen: AudioGenerator,
                model_builder: ModelBuilder,
                # pickle_path,
                # save_model_path,
                # train_json='train_corpus.json',
                # valid_json='valid_corpus.json',
                optimizer=SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5),
                # optimizer=Adam(lr=1e-01),
                epochs=30,
                verbose=0,
                # sort_by_duration=False,
                loss_limit=400):
    # create a class instance for obtaining batches of data
    input_dim = audio_gen.input_dim
    if audio_gen.max_length is None:
        model = model_builder.model(input_shape=(None, input_dim), output_dim=29)
    else:
        model = model_builder.model(input_shape=(audio_gen.max_length, input_dim), output_dim=29)
    model_name = ("Spec" if audio_gen.spectrogram else "MFCC") + " " + model.name
    model.name = model_name
    save_model_path = model.name + ".h5"

    # add the training data to the generator
    # audio_gen.load_train_data(train_json)
    # audio_gen.load_validation_data(valid_json)
    # calculate steps_per_epoch
    num_train_examples = len(audio_gen.train_audio_paths)
    steps_per_epoch = num_train_examples // audio_gen.minibatch_size
    # calculate validation_steps
    num_valid_samples = len(audio_gen.valid_audio_paths)
    validation_steps = num_valid_samples // audio_gen.minibatch_size

    # add CTC loss to the NN specified in input_to_softmax
    pre_model = model
    model = add_ctc_loss(model)

    # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer)

    # make results/ directory, if necessary
    if not os.path.exists('results'):
        os.makedirs('results')

    # add model_checkpoint
    model_checkpoint = ModelCheckpoint(filepath='results/' + save_model_path, verbose=0, save_best_only=True)
    terminate_on_na_n = TerminateOnNaN()
    if verbose > 0:
        callbacks = [model_checkpoint, terminate_on_na_n]
    else:
        metrics_logger = MetricsLogger(model_name=model_name, n_epochs=epochs, loss_limit=loss_limit)
        callbacks = [model_checkpoint, metrics_logger]
        # callbacks = [model_checkpoint, metrics_logger, terminate_on_na_n]

    try:
        # hist = \
        model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch,
                            epochs=epochs, validation_data=audio_gen.next_valid(),
                            validation_steps=validation_steps,
                            callbacks=callbacks, verbose=verbose)
        # hist.history["name"] = model_name
        # save model loss
        # pickle_file_name = 'results/' + pickle_path
        # print("Writing hist.history[\"name\"] = ", model_name, "to ", pickle_file_name)
        # with open(pickle_file_name, 'wb') as f:
        #     pickle.dump(hist.history, f)
    except KeyboardInterrupt:
        display.clear_output(wait=True)
        # print("Training interrupted")
    except Exception:
        try:
            exc_info = sys.exc_info()
        finally:
            # Display the *original* exception
            traceback.print_exception(*exc_info)
            del exc_info
    finally:
        pre_model.summary()
        del pre_model
        del model
    return model_name