Exemplo n.º 1
0
def CreateDiscriminator(args):
    discriminator = FCDiscriminator(num_classes=args.num_classes)
    optimizer = optim.Adam(discriminator.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer.zero_grad()  
    if args.restore_from is not None:
        discriminator.load_state_dict(torch.load(args.restore_from + '_D.pth'))        
    return discriminator, optimizer
def init_net_D(args, state_dict=None):
    net_D = FCDiscriminator(cfg.DATASET.NUM_CLASSES)

    if args.distributed:
        net_D = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net_D)

    if cfg.MODEL.DOMAIN_BN:
        net_D = DomainBN.convert_domain_batchnorm(net_D, num_domains=2)

    if state_dict is not None:
        try:
            net_D.load_state_dict(state_dict)
        except:
            net_D = DomainBN.convert_domain_batchnorm(net_D, num_domains=2)
            net_D.load_state_dict(state_dict)

    if cfg.TRAIN.FREEZE_BN:
        net_D.apply(freeze_BN)

    if torch.cuda.is_available():
        net_D.cuda()

    if args.distributed:
        net_D = DistributedDataParallel(net_D, device_ids=[args.gpu])
    else:
        net_D = torch.nn.DataParallel(net_D)

    return net_D
Exemplo n.º 3
0
def main():

    # create the model
    model = build_model()
    model.to(device)
    model.load_state_dict(torch.load(args.restore_from))

    # create domintor
    model_D1 = FCDiscriminator(num_classes=1)
    model_D1.to(device)
    model_D1.load_state_dict(torch.load(args.D_restore_from))

    up = torch.nn.Upsample(scale_factor=32, mode='bilinear')
    sig = torch.nn.Sigmoid()

    # labels for adversarial training 两种域的记号
    salLabel = 0
    edgeLabel = 1

    picloader = get_loader(args)
    correct = 0
    tot = 0
    for i_iter, data_batch in enumerate(picloader):
        tot += 2

        sal_image, edge_image = data_batch['sal_image'], data_batch[
            'edge_image']
        sal_image, edge_image = Variable(sal_image), Variable(edge_image)
        sal_image, edge_image = sal_image.to(device), edge_image.to(device)

        sal_pred = model(sal_image)
        edge_pred = model(edge_image)

        # test D
        # for param in model_D1.parameters():
        #     param.requires_grad = True

        ss_out = model_D1(sal_pred)
        se_out = model_D1(edge_pred)
        if pan(ss_out) == salLabel:
            correct += 1
        if pan(se_out) == edgeLabel:
            correct += 1

        if i_iter % 100 == 0:
            print('processing %d: %f' % (i_iter, correct / tot))

    print(correct / tot)
def check_original_discriminator(args, pred_target1, pred_target2, n_iter):

    device = torch.device("cuda" if not args.cpu else "cpu")

    model_D1 = FCDiscriminator(num_classes=args.num_classes).to(device)
    model_D2 = FCDiscriminator(num_classes=args.num_classes).to(device)
    model_D1.eval()
    model_D2.eval()

    baseline_dir = 'snapshots/baseline_single_250000_seg0.1_adv10.0002_adv20.001_bs1_11-10-8-52/'
    bce_loss = torch.nn.BCEWithLogitsLoss()
    #### restore model_D1, D2 and model

    # model_D1 parameters
    D1 = baseline_dir + 'GTA5_150000_D1.pth'
    saved_state_dict = torch.load(D1)
    model_D1.load_state_dict(saved_state_dict)

    # model_D2 parameters
    D2 = baseline_dir + 'GTA5_150000_D2.pth'
    saved_state_dict = torch.load(D2)
    model_D2.load_state_dict(saved_state_dict)

    D_out1 = model_D1(F.softmax(pred_target1))
    D_out2 = model_D2(F.softmax(pred_target2))
    orig_d1_loss = bce_loss(
        D_out1,
        torch.FloatTensor(D_out1.data.size()).fill_(1).to(device))
    orig_d2_loss = bce_loss(
        D_out2,
        torch.FloatTensor(D_out2.data.size()).fill_(1).to(device))

    original_discriminator_file = args.snapshot_dir + '/original_discriminator.txt'

    with open(original_discriminator_file, 'a+') as f:
        f.write('n_iter:{} '.format(n_iter))
        f.write('orig d1 loss:{} '.format(orig_d1_loss))
        f.write('orig d2 loss:{} '.format(orig_d2_loss))
        f.write('\n')

    del model_D1
    del model_D2
Exemplo n.º 5
0
def main():
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True
    gpu = args.gpu

    # create network

    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters (weights)
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(
            args.restore_from
        )  ## http://vllab1.ucmerced.edu/~whung/adv-semi-seg/resnet101COCO-41f33a49.pth
    else:
        saved_state_dict = torch.load(args.restore_from)
        #checkpoint = torch.load(args.restore_from)_

    # only copy the params that exist in current model (caffe-like)
    new_params = model.state_dict().copy()  # state_dict() is current model
    for name, param in new_params.items():
        #print (name) # 'conv1.weight, name:param(value), dict
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            #print('copy {}'.format(name))
    model.load_state_dict(new_params)
    #model.load_state_dict(checkpoint['state_dict'])
    #optimizer.load_state_dict(args.checkpoint['optim_dict'])

    model.train(
    )  # https://pytorch.org/docs/stable/nn.html, Sets the module in training mode.
    model.cuda(args.gpu)  ##

    cudnn.benchmark = True  # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware

    # init D

    model_D = FCDiscriminator(num_classes=args.num_classes)
    #args.restore_from_D = 'snapshots/linear2/VOC_25000_D.pth'
    if args.restore_from_D is not None:  # None
        model_D.load_state_dict(torch.load(args.restore_from_D))
        # checkpoint_D = torch.load(args.restore_from_D)
        # model_D.load_state_dict(checkpoint_D['state_dict'])
        # optimizer_D.load_state_dict(checkpoint_D['optim_dict'])
    model_D.train()
    model_D.cuda(args.gpu)

    if USECALI:
        model_cali = ModelWithTemperature(model, model_D)
        model_cali.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    random.seed(args.random_seed)
    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed(args.random_seed)
    train_dataset = VOCDataSet(args.data_dir,
                               args.data_list,
                               crop_size=input_size,
                               scale=args.random_scale,
                               mirror=args.random_mirror,
                               mean=IMG_MEAN)
    train_dataset_remain = VOCDataSet(args.data_dir,
                                      args.data_list_remain,
                                      crop_size=input_size,
                                      scale=args.random_scale,
                                      mirror=args.random_mirror,
                                      mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)
    train_dataset_size_remain = len(train_dataset_remain)

    print train_dataset_size
    print train_dataset_size_remain

    train_gt_dataset = VOCGTDataSet(args.data_dir,
                                    args.data_list,
                                    crop_size=input_size,
                                    scale=args.random_scale,
                                    mirror=args.random_mirror,
                                    mean=IMG_MEAN)
    if args.partial_data is None:  #if not partial, load all

        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=5,
                                      pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=5,
                                         pin_memory=True)
    else:
        #sample partial data
        #args.partial_data = 0.125
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:  #args.partial_id is none
            train_ids = range(train_dataset_size)
            train_ids_remain = range(train_dataset_size_remain)
            np.random.shuffle(train_ids)  #shuffle!
            np.random.shuffle(train_ids_remain)

        pickle.dump(train_ids,
                    open(osp.join(args.snapshot_dir, 'train_id.pkl'),
                         'wb'))  #randomly suffled ids

        #sampler
        train_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:])  # 0~1/8,
        train_remain_sampler = data.sampler.SubsetRandomSampler(
            train_ids_remain[:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(train_ids[:])
        # train_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size]) # 0~1/8
        # train_remain_sampler = data.sampler.SubsetRandomSampler(train_ids[partial_size:]) # used as unlabeled, 7/8
        # train_gt_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])

        #train loader
        trainloader = data.DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            sampler=train_sampler,
            num_workers=3,
            pin_memory=True)  # multi-process data loading
        trainloader_remain = data.DataLoader(train_dataset_remain,
                                             batch_size=args.batch_size,
                                             sampler=train_remain_sampler,
                                             num_workers=3,
                                             pin_memory=True)
        # trainloader_remain = data.DataLoader(train_dataset,
        #                                      batch_size=args.batch_size, sampler=train_remain_sampler, num_workers=3,
        #                                     pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         sampler=train_gt_sampler,
                                         num_workers=3,
                                         pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network

    # model.optim_paramters(args) = list(dict1, dict2), dict1 >> 'lr' and 'params'
    # print(type(model.optim_parameters(args)[0]['params'])) # generator
    #print(model.state_dict()['coeff'][0]) #confirmed

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    #optimizer.add_param_group({"params":model.coeff}) # assign new coefficient to the optimizer
    #print(len(optimizer.param_groups))
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()  #initialize

    if USECALI:
        optimizer_cali = optim.LBFGS([model_cali.temperature],
                                     lr=0.01,
                                     max_iter=50)
        optimizer_cali.zero_grad()

        nll_criterion = BCEWithLogitsLoss().cuda()  # BCE!!
        ece_criterion = ECELoss().cuda()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(
        size=(input_size[1], input_size[0]), mode='bilinear'
    )  # okay it automatically change to functional.interpolate
    # 321, 321

    if version.parse(torch.__version__) >= version.parse('0.4.0'):  #0.4.1
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear',
                             align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear')

    # labels for adversarial training
    pred_label = 0
    gt_label = 1
    semi_ratio_sum = 0
    semi_sum = 0
    loss_seg_sum = 0
    loss_adv_sum = 0
    loss_vat_sum = 0
    l_seg_sum = 0
    l_vat_sum = 0
    l_adv_sum = 0
    logits_list = []
    labels_list = []

    #https: // towardsdatascience.com / understanding - pytorch -with-an - example - a - step - by - step - tutorial - 81fc5f8c4e8e

    for i_iter in range(args.num_steps):

        loss_seg_value = 0  # L_seg
        loss_adv_pred_value = 0  # 0.01 L_adv
        loss_D_value = 0  # L_D
        loss_semi_value = 0  # 0.1 L_semi
        loss_semi_adv_value = 0  # 0.001 L_adv
        loss_vat_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)  #changing lr by iteration

        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            ###################### train G!!!###########################
            ############################################################
            # don't accumulate grads in D

            for param in model_D.parameters(
            ):  # <class 'torch.nn.parameter.Parameter'>, convolution weights
                param.requires_grad = False  # do not update gradient of D (freeze) while G

            ######### do unlabeled first!! 0.001 L_adv + 0.1 L_semi ###############

            # lambda_semi, lambda_adv for unlabeled
            if (args.lambda_semi > 0 or args.lambda_semi_adv > 0
                ) and i_iter >= args.semi_start_adv:
                try:
                    _, batch = trainloader_remain_iter.next(
                    )  #remain = unlabeled
                    print(trainloader_remain_iter.next())
                except:
                    trainloader_remain_iter = enumerate(
                        trainloader_remain)  # impose counters
                    _, batch = trainloader_remain_iter.next()

                # only access to img
                images, _, _, _ = batch  # <class 'torch.Tensor'>
                images = Variable(images).cuda(
                    args.gpu)  # <class 'torch.Tensor'>

                pred = interp(
                    model(images))  # S(X), pred <class 'torch.Tensor'>
                pred_remain = pred.detach(
                )  #use detach() when attempting to remove a tensor from a computation graph, will be used for D
                # https://discuss.pytorch.org/t/clone-and-detach-in-v0-4-0/16861
                # The difference is that detach refers to only a given variable on which it's called.
                # torch.no_grad affects all operations taking place within the with statement. >> for context,
                # requires_grad is for tensor

                # pred >> (8,21,321,321), L_adv
                D_out = interp(
                    model_D(F.softmax(pred))
                )  # D(S(X)), confidence, 8,1,321,321, not detached, there was not dim
                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(
                    axis=1)  # (8,321,321) 0~1

                # 0.001 L_adv!!!!
                ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(
                    np.bool)  # no ignore_mask for unlabeled adv
                loss_semi_adv = args.lambda_semi_adv * bce_loss(
                    D_out, make_D_label(gt_label,
                                        ignore_mask_remain))  #gt_label =1,
                # -log(D(S(X)))
                loss_semi_adv = loss_semi_adv / args.iter_size  #normalization

                loss_semi_adv_value += loss_semi_adv.data.cpu().numpy(
                ) / args.lambda_semi_adv

                ##--- visualization, pred(8,21,321,321), D_out_sigmoid(8,321,321)
                """
                if i_iter % 1000 == 0:
                    vpred = pred.transpose(1, 2).transpose(2, 3).contiguous()  # (8,321,321,21)
                    vpred = vpred.view(-1, 21)  # (8*321*321, 21)
                    vlogsx = F.log_softmax(vpred)  # torch.Tensor
                    vsemi_gt = pred.data.cpu().numpy().argmax(axis=1)
                    vsemi_gt = Variable(torch.FloatTensor(vsemi_gt).long()).cuda(gpu)
                    vlogsx = vlogsx.gather(1, vsemi_gt.view(-1, 1))
                    sx = F.softmax(vpred).gather(1, vsemi_gt.view(-1, 1))
                    vD_out_sigmoid = Variable(torch.FloatTensor(D_out_sigmoid)).cuda(gpu).view(-1, 1)
                    vlogsx = (vlogsx*(2.5*vD_out_sigmoid+0.5))
                    vlogsx = -vlogsx.squeeze(dim=1)
                    sx = sx.squeeze(dim=1)
                    vD_out_sigmoid = vD_out_sigmoid.squeeze(dim=1)
                    dsx = vD_out_sigmoid.data.cpu().detach().numpy()
                    vlogsx = vlogsx.data.cpu().detach().numpy()
                    sx = sx.data.cpu().detach().numpy()
                    plt.clf()
                    plt.figure(figsize=(15, 5))
                    plt.subplot(131)
                    plt.ylim(0, 0.004)
                    plt.scatter(dsx, vlogsx, s = 0.1)  # variable requires grad cannot call numpy >> detach
                    plt.xlabel('D(S(X))')
                    plt.ylabel('Loss_Semi per Pixel')
                    plt.subplot(132)
                    plt.scatter(dsx, vlogsx, s = 0.1)  # variable requires grad cannot call numpy >> detach
                    plt.xlabel('D(S(X))')
                    plt.ylabel('Loss_Semi per Pixel')
                    plt.subplot(133)
                    plt.scatter(dsx, sx, s=0.1)
                    plt.xlabel('D(S(X))')
                    plt.ylabel('S(x)')
                    plt.savefig('/home/eungyo/AdvSemiSeg/plot/'  + str(i_iter) + '.png')
                    """

                if args.lambda_semi <= 0 or i_iter < args.semi_start:
                    loss_semi_adv.backward()
                    loss_semi_value = 0
                else:

                    semi_gt = pred.data.cpu().numpy().argmax(
                        axis=1
                    )  # pred=S(X) ((8,21,321,321)), semi_gt is not one-hot, 8,321,321
                    #(8, 321, 321)

                    if not USECALI:
                        semi_ignore_mask = (
                            D_out_sigmoid < args.mask_T
                        )  # both (8,321,321) 0~1threshold!, numpy
                        semi_gt[
                            semi_ignore_mask] = 255  # Yhat, ignore pixel becomes 255
                        semi_ratio = 1.0 - float(semi_ignore_mask.sum(
                        )) / semi_ignore_mask.size  # ignored pixels / H*W
                        print('semi ratio: {:.4f}'.format(semi_ratio))

                        if semi_ratio == 0.0:
                            loss_semi_value += 0
                        else:
                            semi_gt = torch.FloatTensor(semi_gt)
                            confidence = torch.FloatTensor(
                                D_out_sigmoid)  ## added, only pred is on cuda
                            loss_semi = args.lambda_semi * weighted_loss_calc(
                                pred, semi_gt, args.gpu, confidence)

                    else:
                        semi_ratio = 1
                        semi_gt = (torch.FloatTensor(semi_gt))  # (8,321,321)
                        confidence = torch.FloatTensor(
                            F.sigmoid(
                                model_cali.temperature_scale(D_out.view(
                                    -1))).data.cpu().numpy())  # (8*321*321,)
                        loss_semi = args.lambda_semi * calibrated_loss_calc(
                            pred, semi_gt, args.gpu, confidence, accuracies,
                            n_bin
                        )  #  L_semi = Yhat * log(S(X)) # loss_calc(pred, semi_gt, args.gpu)
                        # pred(8,21,321,321)

                    if semi_ratio != 0:
                        loss_semi = loss_semi / args.iter_size
                        loss_semi_value += loss_semi.data.cpu().numpy(
                        ) / args.lambda_semi

                        if args.method == 'vatent' or args.method == 'vat':
                            #v_loss = vat_loss(model, images, pred, eps=args.epsilon[i])  # R_vadv
                            weighted_v_loss = weighted_vat_loss(
                                model,
                                images,
                                pred,
                                confidence,
                                eps=args.epsilon)

                            if args.method == 'vatent':
                                #v_loss += entropy_loss(pred)  # R_cent (conditional entropy loss)
                                weighted_v_loss += weighted_entropy_loss(
                                    pred, confidence)

                            v_loss = weighted_v_loss / args.iter_size
                            loss_vat_value += v_loss.data.cpu().numpy()
                            loss_semi_adv += args.alpha * v_loss

                            loss_vat_sum += loss_vat_value
                            if i_iter % 100 == 0 and sub_i == 4:
                                l_vat_sum = loss_vat_sum / 100
                                if i_iter == 0:
                                    l_vat_sum = l_vat_sum * 100
                                loss_vat_sum = 0

                        loss_semi += loss_semi_adv
                        loss_semi.backward(
                        )  # 0.001 L_adv + 0.1 L_semi, backward == back propagation

            else:
                loss_semi = None
                loss_semi_adv = None

            ###########train with source (labeled data)############### L_ce + 0.01 * L_adv

            try:
                _, batch = trainloader_iter.next()
            except:
                trainloader_iter = enumerate(trainloader)  # safe coding
                _, batch = trainloader_iter.next()  #counter, batch

            images, labels, _, _ = batch  # also get labels images(8,321,321)
            images = Variable(images).cuda(args.gpu)
            ignore_mask = (
                labels.numpy() == 255
            )  # ignored pixels == 255 >> 1, yes ignored mask for labeled data

            pred = interp(model(images))  # S(X), 8,21,321,321
            loss_seg = loss_calc(pred, labels,
                                 args.gpu)  # -Y*logS(X)= L_ce, not detached

            if USED:
                softsx = F.softmax(pred, dim=1)
                D_out = interp(model_D(softsx))  # D(S(X)), L_adv

                loss_adv_pred = bce_loss(
                    D_out, make_D_label(
                        gt_label,
                        ignore_mask))  # both  8,1,321,321, gt_label = 1
                # L_adv =  -log(D(S(X)), make_D_label is all 1 except ignored_region

                loss = loss_seg + args.lambda_adv_pred * loss_adv_pred
                if USECALI:
                    if (args.lambda_semi > 0 or args.lambda_semi_adv > 0
                        ) and i_iter >= args.semi_start_adv:
                        with torch.no_grad():
                            _, prediction = torch.max(softsx, 1)
                            labels_mask = (
                                (labels > 0) *
                                (labels != 255)) | (prediction.data.cpu() > 0)
                            labels = labels[labels_mask]
                            prediction = prediction[labels_mask]
                            fake_mask = (labels.data.cpu().numpy() !=
                                         prediction.data.cpu().numpy())
                            real_label = make_conf_label(
                                1, fake_mask
                            )  # (10*321*321, ) 0 or 1 (fake or real)

                            logits = D_out.squeeze(dim=1)
                            logits = logits[labels_mask]
                            logits_list.append(logits)  # initialize
                            labels_list.append(real_label)

                        if (i_iter * args.iter_size * args.batch_size + sub_i +
                                1) % train_dataset_size == 0:
                            logits = torch.cat(logits_list).cuda(
                            )  # overall 5000 images in val,  #logits >> 5000,100, (1464*321*321,)
                            labels = torch.cat(labels_list).cuda()
                            before_temperature_nll = nll_criterion(
                                logits, labels).item()  ####modify
                            before_temperature_ece, _, _ = ece_criterion(
                                logits, labels)  # (1464*321*321,)
                            before_temperature_ece = before_temperature_ece.item(
                            )
                            print('Before temperature - NLL: %.3f, ECE: %.3f' %
                                  (before_temperature_nll,
                                   before_temperature_ece))

                            def eval():
                                loss_cali = nll_criterion(
                                    model_cali.temperature_scale(logits),
                                    labels)
                                loss_cali.backward()
                                return loss_cali

                            optimizer_cali.step(
                                eval)  # just one backward >> not 50 iterations
                            after_temperature_nll = nll_criterion(
                                model_cali.temperature_scale(logits),
                                labels).item()
                            after_temperature_ece, accuracies, n_bin = ece_criterion(
                                model_cali.temperature_scale(logits), labels)
                            after_temperature_ece = after_temperature_ece.item(
                            )
                            print('Optimal temperature: %.3f' %
                                  model_cali.temperature.item())
                            print(
                                'After temperature - NLL: %.3f, ECE: %.3f' %
                                (after_temperature_nll, after_temperature_ece))

                            logits_list = []
                            labels_list = []

            else:
                loss = loss_seg

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_sum += loss_seg / args.iter_size
            if USED:
                loss_adv_sum += loss_adv_pred
            if i_iter % 100 == 0 and sub_i == 4:
                l_seg_sum = loss_seg_sum / 100
                if USED:
                    l_adv_sum = loss_adv_sum / 100
                if i_iter == 0:
                    l_seg_sum = l_seg_sum * 100
                    l_adv_sum = l_adv_sum * 100
                loss_seg_sum = 0
                loss_adv_sum = 0

            loss_seg_value += loss_seg.data.cpu().numpy() / args.iter_size
            if USED:
                loss_adv_pred_value += loss_adv_pred.data.cpu().numpy(
                ) / args.iter_size

            ##################### train D!!!###########################
            ###########################################################
            # bring back requires_grad
            if USED:
                for param in model_D.parameters():
                    param.requires_grad = True  # before False.

                ############# train with pred S(X)############# labeled + unlabeled
                pred = pred.detach(
                )  #orginally only use labeled data, freeze S(X) when train D,

                # We do train D with the unlabeled data. But the difference is quite small

                if args.D_remain:  #default true
                    pred = torch.cat(
                        (pred, pred_remain), 0
                    )  # pred_remain(unlabeled S(x)) is detached  16,21,321,321
                    ignore_mask = np.concatenate(
                        (ignore_mask, ignore_mask_remain),
                        axis=0)  # 16,321,321

                D_out = interp(
                    model_D(F.softmax(pred, dim=1))
                )  # D(S(X)) 16,1,321,321  # softmax(pred,dim=1) for 0.4, not nessesary

                loss_D = bce_loss(D_out,
                                  make_D_label(pred_label,
                                               ignore_mask))  # pred_label = 0

                # -log(1-D(S(X)))
                loss_D = loss_D / args.iter_size / 2  # iter_size = 1, /2 because there is G and D
                loss_D.backward()
                loss_D_value += loss_D.data.cpu().numpy()

                ################## train with gt################### only labeled
                #VOCGT and VOCdataset can be reduced to one dataset in this repo.
                # get gt labels Y
                #print "before train gt"
                try:
                    print(trainloader_gt_iter.next())  # len 732
                    _, batch = trainloader_gt_iter.next()

                except:
                    trainloader_gt_iter = enumerate(trainloader_gt)
                    _, batch = trainloader_gt_iter.next()
                #print "train with gt?"
                _, labels_gt, _, _ = batch
                D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)  #one_hot
                ignore_mask_gt = (labels_gt.numpy() == 255
                                  )  # same as ignore_mask (8,321,321)
                #print "finish"
                D_out = interp(model_D(D_gt_v))  # D(Y)
                loss_D = bce_loss(D_out,
                                  make_D_label(gt_label,
                                               ignore_mask_gt))  # log(D(Y))
                loss_D = loss_D / args.iter_size / 2

                loss_D.backward()
                loss_D_value += loss_D.data.cpu().numpy()

        optimizer.step()

        if USED:
            optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))  #snapshot
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.6f}, loss_semi = {5:.6f}, loss_semi_adv = {6:.3f}, loss_vat = {7: .5f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_pred_value, loss_D_value, loss_semi_value,
                    loss_semi_adv_value, loss_vat_value))
        #                                        L_ce             L_adv for labeled      L_D                L_semi              L_adv for unlabeled
        #loss_adv should be inversely proportional to the loss_D if they are seeing the same data.
        # loss_adv_p is essentially the inverse loss of loss_D. We expect them to achieve a good balance during the adversarial training
        # loss_D is around 0.2-0.5   >> good
        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '_D.pth'))
            #torch.save(state, osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth.tar'))
            #torch.save(state_D, osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth.tar'))
            break

        if i_iter % 100 == 0 and sub_i == 4:  #loss_seg_value
            wdata = "iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.6f}, loss_semi = {5:.8f}, loss_semi_adv = {6:.3f}, l_vat_sum = {7: .5f}, loss_label = {8: .4}\n".format(
                i_iter, args.num_steps, l_seg_sum, l_adv_sum, loss_D_value,
                loss_semi_value, loss_semi_adv_value, l_vat_sum,
                l_seg_sum + 0.01 * l_adv_sum)
            #wdata2 = "{0:8d} {1:s} {2:s} {3:s} {4:s} {5:s} {6:s} {7:s} {8:s}\n".format(i_iter,str(model.coeff[0])[8:14],str(model.coeff[1])[8:14],str(model.coeff[2])[8:14],str(model.coeff[3])[8:14],str(model.coeff[4])[8:14],str(model.coeff[5])[8:14],str(model.coeff[6])[8:14],str(model.coeff[7])[8:14])
            if i_iter == 0:
                f2 = open("/home/eungyo/AdvSemiSeg/snapshots/log.txt", 'w')
                f2.write(wdata)
                f2.close()
                #f3 = open("/home/eungyo/AdvSemiSeg/snapshots/coeff.txt", 'w')
                #f3.write(wdata2)
                #f3.close()
            else:
                f1 = open("/home/eungyo/AdvSemiSeg/snapshots/log.txt", 'a')
                f1.write(wdata)
                f1.close()
                #f4 = open("/home/eungyo/AdvSemiSeg/snapshots/coeff.txt", 'a')
                #f4.write(wdata2)
                #f4.close()

        if i_iter % args.save_pred_every == 0 and i_iter != 0:  # 5000
            print('taking snapshot ...')
            #state = {'epoch':i_iter, 'state_dict':model.state_dict(),'optim_dict':optimizer.state_dict()}
            #state_D = {'epoch':i_iter, 'state_dict': model_D.state_dict(), 'optim_dict': optimizer_D.state_dict()}
            #torch.save(state, osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth.tar'))
            #torch.save(state_D, osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth.tar'))
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Exemplo n.º 6
0
def main():

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    cudnn.enabled = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # initialize parameters
    num_steps = args.num_steps
    batch_size = args.batch_size
    lr = args.lr
    save_cp = args.save_cp
    img_scale = args.scale
    val_percent = args.val / 100

    # data input
    dataset = BasicDataset(IMG_DIRECTORY, MASK_DIRECTORY, img_scale)
    n_val = int(len(dataset) * val_percent)
    n_train = len(dataset) - n_val
    train, val = random_split(dataset, [n_train, n_val])
    tcga_dataset = UnlabeledDataset(TCGA_DIRECTORY)
    n_unlabeled = len(tcga_dataset)

    # create network
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    #logger.addHandler(logging.StreamHandler())
    logging.info('Using device %s' % str(device))
    logging.info('Network %s' % args.mod)
    logging.info('''Starting training:
            Num_steps:          %.2f
            Batch size:      %.2f
            Learning rate:   %.4f_transform
            Training size:   %.0f
            Validation size: %.0f
            Unlabeled size:  %.0f
            Checkpoints:     %s
            Device:          %s
            Scale:           %.2f
        ''' % (num_steps, batch_size, lr, n_train, n_val, n_unlabeled,
               str(save_cp), str(device.type), img_scale))
    if args.mod == 'unet':
        net = UNet(n_channels=3, n_classes=NUM_CLASSES)
        print('channels = %d , classes = %d' % (net.n_channels, net.n_classes))
    elif args.mod == 'modified_unet':
        net = modified_UNet(n_channels=3, n_classes=NUM_CLASSES)
        print('channels = %d , classes = %d' % (net.n_channels, net.n_classes))
    elif args.mod == 'deeplabv3':
        net = DeepLabV3(nclass=NUM_CLASSES, pretrained_base=False)
        print('channels = 3 , classes = %d' % net.nclass)
    elif args.mod == 'deeplabv3plus':
        net = DeepLabV3Plus(nclass=NUM_CLASSES, pretrained_base=False)
        print('channels = 3 , classes = %d' % net.nclass)
    elif args.mod == 'nestedunet':
        net = NestedUNet(nclass=NUM_CLASSES, deep_supervision=False)
        print('channels = 3 , classes = %d' % net.nlass)
    elif args.mod == 'inception3':
        net = Inception3(n_classes=4,
                         inception_blocks=None,
                         init_weights=True,
                         bilinear=True)
        print('channels = 3 , classes = %d' % net.n_classes)

    net.to(device=device)
    net.train()

    cudnn.benchmark = True

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda()

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    if args.semi_train is None:
        train_loader = DataLoader(train,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=8,
                                  pin_memory=True)
        val_loader = DataLoader(val,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=8,
                                pin_memory=True)
    else:
        #read unlabeled data and labeled data
        train_loader = DataLoader(train,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=4,
                                  pin_memory=True)
        val_loader = DataLoader(val,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=4,
                                pin_memory=True)

        trainloader_remain = DataLoader(tcga_dataset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        num_workers=4,
                                        pin_memory=True)
        #trainloader_gt = data.DataLoader(train_gt_dataset,
        #batch_size=args.batch_size, sampler=train_gt_sampler, num_workers=3, pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(train_loader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    #optimizer = optim.SGD(net.optim_parameters(args),
    #lr=args.learning_rate, momentum=args.momentum,weight_decay=args.weight_decay)
    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    optimizer.zero_grad()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           10000,
                                                           eta_min=1e-6,
                                                           last_epoch=-1)

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    #optimizer_D = optim.SGD(model_D.parameters(), lr=args.learning_rate_D, momentum=args.momentum,weight_decay=args.weight_decay)
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    '''
    if version.parse(torch.__version__) >= version.parse('0.4.0'):
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear', align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')
    '''

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    for i_iter in range(args.num_steps):

        best_acc = 0
        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0
        loss_semi_adv_value = 0

        optimizer.zero_grad()
        #adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False
            for param in net.parameters():
                param.requires_grad = True

            # do semi first
            if (args.lambda_semi > 0 or args.lambda_semi_adv > 0
                ) and i_iter >= args.semi_start_adv:
                try:
                    _, batch = trainloader_remain_iter.__next__()
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = trainloader_remain_iter.__next__()

                # only access to img
                images = batch['image']
                images = images.type(torch.FloatTensor)
                images = Variable(images).cuda()

                pred = net(images)
                pred_remain = pred.detach()

                D_out = interp(model_D(F.softmax(pred, dim=1)))
                D_out_sigmoid = torch.sigmoid(
                    D_out).data.cpu().numpy().squeeze(axis=1)
                #D_out_sigmoid = torch.sigmoid(D_out).data.cpu().numpy()

                #ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(np.bool)

                targetr = Variable(torch.ones(D_out.shape))
                targetr = Variable(torch.FloatTensor(targetr)).cuda()
                loss_semi_adv = args.lambda_semi_adv * bce_loss(D_out, targetr)
                loss_semi_adv = loss_semi_adv / args.iter_size

                #loss_semi_adv.backward()
                #loss_semi_adv_value += loss_semi_adv.data.cpu().numpy()[0]/args.lambda_semi_adv
                loss_semi_adv_value += loss_semi_adv.cpu().detach().numpy(
                ).item() / args.lambda_semi_adv

                if args.lambda_semi <= 0 or i_iter < args.semi_start:
                    loss_semi_adv.backward()
                    loss_semi_value = 0
                else:
                    # produce ignore mask
                    semi_ignore_mask = (D_out_sigmoid < args.mask_T)

                    semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                    semi_gt[semi_ignore_mask] = 255

                    semi_ratio = 1.0 - float(
                        semi_ignore_mask.sum()) / semi_ignore_mask.size
                    print('semi ratio: {:.4f}'.format(semi_ratio))

                    if semi_ratio == 0.0:
                        loss_semi_value += 0
                    else:
                        semi_gt = torch.FloatTensor(semi_gt)

                        loss_semi = args.lambda_semi * loss_calc(pred, semi_gt)
                        loss_semi = loss_semi / args.iter_size
                        loss_semi_value += loss_semi.cpu().detach().numpy(
                        ).item() / args.lambda_semi
                        loss_semi += loss_semi_adv
                        loss_semi.backward()

            else:
                loss_semi = None
                loss_semi_adv = None

            # train with source

            try:
                _, batch = trainloader_iter.__next__()
            except:
                trainloader_iter = enumerate(train_loader)
                _, batch = trainloader_iter.__next__()

            images = batch['image']
            labels = batch['mask']
            images = images.to(device=device, dtype=torch.float32)
            labels = labels.to(device=device, dtype=torch.long)
            labels = labels.squeeze(1)
            ignore_mask = (labels.cpu().numpy() == 255)
            #pred = interp(net(images))

            pred = net(images)
            criterion = nn.CrossEntropyLoss()
            loss_seg = criterion(pred, labels)
            #loss_seg = loss_calc(pred, labels)

            D_out = interp(model_D(F.softmax(pred, dim=1)))

            targetr = Variable(torch.ones(D_out.shape))
            targetr = Variable(torch.FloatTensor(targetr)).cuda()
            #loss_adv_pred = bce_loss(D_out, targetr)

            if i_iter > args.semi_start_adv:
                loss_adv_pred = bce_loss(D_out, targetr)
                loss = loss_seg + args.lambda_adv_pred * loss_adv_pred
                loss_adv_pred_value += loss_adv_pred.cpu().detach().numpy(
                ).item() / args.iter_size
            else:
                loss = loss_seg

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            optimizer.step()
            loss_seg_value += loss_seg.cpu().detach().numpy().item(
            ) / args.iter_size
            #loss_adv_pred_value += loss_adv_pred.cpu().detach().numpy().item()/args.iter_size

            # train D

            # bring back requires_grad
            if i_iter > args.semi_start_adv and i_iter % 3 == 0:
                for param in net.parameters():
                    param.requires_grad = False
                for param in model_D.parameters():
                    param.requires_grad = True

            # train with pred
                pred = pred.detach()

                if args.D_remain:
                    pred = torch.cat((pred, pred_remain), 0)
                #ignore_mask = np.concatenate((ignore_mask,ignore_mask_remain), axis = 0)

                D_out = interp(model_D(F.softmax(pred, dim=1)))
                #targetf = Variable(torch.zeros(D_out.shape))
                targetf = 0.1 * np.random.rand(D_out.shape[0], D_out.shape[1],
                                               D_out.shape[2], D_out.shape[3])
                targetf = Variable(torch.FloatTensor(targetf)).cuda()
                loss_D = bce_loss(D_out, targetf)
                loss_D = loss_D / args.iter_size / 2
                loss_D.backward()
                loss_D_value += loss_D.data.cpu().detach().numpy().item()

                # train with gt
                # get gt labels
                try:
                    _, batch = trainloader_iter.__next__()
                except:
                    trainloader_iter = enumerate(train_loader)
                    _, batch = trainloader_iter.__next__()

                labels_gt = batch['mask']
                D_gt_v = Variable(one_hot(labels_gt)).cuda()
                ignore_mask_gt = (labels_gt.numpy() == 255).squeeze(axis=1)

                D_out = interp(model_D(D_gt_v))
                #targetr = Variable(torch.ones(D_out.shape))
                targetr = 0.1 * np.random.rand(D_out.shape[0], D_out.shape[1],
                                               D_out.shape[2],
                                               D_out.shape[3]) + 0.9
                targetr = Variable(torch.FloatTensor(targetr)).cuda()
                loss_D = bce_loss(D_out, targetr)
                loss_D = loss_D / args.iter_size / 2
                loss_D.backward()
                optimizer_D.step()
                loss_D_value += loss_D.cpu().detach().numpy().item()
        scheduler.step()

        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_semi = {5:.3f}, loss_semi_adv = {6:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_pred_value, loss_D_value, loss_semi_value,
                    loss_semi_adv_value))
        '''
        if i_iter >= args.num_steps-1:
            print 'save model ...'
            torch.save(model.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(args.num_steps)+'.pth'))
            torch.save(model_D.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(args.num_steps)+'_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter!=0:
            print 'taking snapshot ...'
            torch.save(model.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(i_iter)+'.pth'))
            torch.save(model_D.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(i_iter)+'_D.pth'))
        '''
        # save checkpoints
        if save_cp and (i_iter % 1000) == 0 and (i_iter != 0):
            try:
                os.mkdir(DIR_CHECKPOINTS)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(net.state_dict(),
                       DIR_CHECKPOINTS + 'i_iter_%d.pth' % (i_iter + 1))
            logging.info('Checkpoint %d saved !' % (i_iter + 1))

        if (i_iter % 1000 == 0) and (i_iter != 0):
            val_score, accuracy, dice_avr, dice_panck, dice_nuclei, dice_lcell = eval_net(
                net, val_loader, device, n_val)
            logging.info('Validation cross entropy: {}'.format(val_score))
            if accuracy > best_acc:
                best_acc = accuracy
            result_file = open('result.txt', 'a', encoding='utf-8')
            result_file.write('best_acc = ' + str(best_acc) + '\n' +
                              'iter = ' + str(i_iter) + '\n')
            result_file.close
def train(gpu, args):
    """Create the model and start the training."""

    rank = args.nr * args.num_gpus + gpu
    if gpu == 1:
        gpu = 3

    dist.init_process_group(backend="nccl",
                            world_size=args.world_size,
                            rank=rank)

    if args.batch_size == 1 and args.use_bn is True:
        raise Exception

    torch.autograd.set_detect_anomaly(True)
    torch.manual_seed(args.torch_seed)
    torch.cuda.manual_seed(args.cuda_seed)

    torch.cuda.set_device(gpu)

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    cudnn.enabled = True
    gpu = gpu

    criterion = DiceBCELoss()
    # criterion = nn.CrossEntropyLoss(ignore_index=253)
    # Create network
    if args.model == 'DeepLab':
        model = DeeplabMulti(num_classes=args.num_classes)
        if args.restore_from is None:
            pass
        elif args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        elif args.restore_from is not None:
            saved_state_dict = torch.load(args.restore_from)
            model.load_state_dict(saved_state_dict)
            print("Loaded state dicts for model")
        # if args.restore_from is not None:
        #     new_params = model.state_dict().copy()
        #     for i in saved_state_dict:
        #         # Scale.layer5.conv2d_list.3.weight
        #         i_parts = i.split('.')
        #         # print i_parts
        #         if not args.num_classes == 19 or not i_parts[1] == 'layer5':
        #             new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
        #             # print i_parts
        #     model.load_state_dict(new_params)

    if not args.no_logging:
        if not os.path.isdir(args.log_dir):
            os.mkdir(args.log_dir)
        log_dir = os.path.join(args.log_dir, args.exp_dir)
        if not os.path.isdir(log_dir):
            os.mkdir(log_dir)
        if args.exp_name == "":
            exp_name = datetime.datetime.now().strftime("%H%M%S-%Y%m%d")
        else:
            exp_name = args.exp_name
        log_dir = os.path.join(log_dir, exp_name)
        writer = SummaryWriter(log_dir)

    model.train()
    # model.cuda(gpu)
    model = model.cuda(device=gpu)

    if args.num_gpus > 0 or torch.cuda.device_count() > 0:
        model = DistributedDataParallel(model,
                                        device_ids=[gpu],
                                        find_unused_parameters=True)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # cudnn.benchmark = True

    # init D
    model_D1 = FCDiscriminator(num_classes=args.num_classes)
    model_D2 = FCDiscriminator(num_classes=args.num_classes)
    start_epoch = 0
    if "http" not in args.restore_from and args.restore_from is not None:
        root, extension = args.restore_from.strip().split(".")
        D1pth = root + "_D1." + extension
        D2pth = root + "_D2." + extension
        saved_state_dict = torch.load(D1pth)
        model_D1.load_state_dict(saved_state_dict)
        saved_state_dict = torch.load(D2pth)
        model_D2.load_state_dict(saved_state_dict)
        start_epoch = int(re.findall(r'[\d]+', root)[-1])
        print("Loaded state dict for models D1 and D2")

    model_D1.train()
    # model_D1.cuda(gpu)
    model_D2.train()
    # model_D2.cuda(gpu)

    model_D1 = model_D1.cuda(device=gpu)
    model_D2 = model_D2.cuda(device=gpu)

    if args.num_gpus > 0 or torch.cuda.device_count() > 0:
        model_D1 = DistributedDataParallel(model_D1,
                                           device_ids=[gpu],
                                           find_unused_parameters=True)
        model_D2 = DistributedDataParallel(model_D2,
                                           device_ids=[gpu],
                                           find_unused_parameters=True)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_dataset = SyntheticSmokeTrain(args={},
                                        dataset_limit=args.num_steps *
                                        args.iter_size * args.batch_size,
                                        image_shape=input_size,
                                        dataset_mean=IMG_MEAN)

    train_sampler = DistributedSampler(train_dataset,
                                       num_replicas=args.world_size,
                                       rank=rank,
                                       shuffle=True)
    trainloader = data.DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=args.num_workers,
                                  pin_memory=True,
                                  sampler=train_sampler)

    # trainloader = data.DataLoader(
    #     GTA5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size,
    #                 crop_size=input_size,
    #                 scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN),
    #     batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)

    trainloader_iter = enumerate(trainloader)
    print("Length of train dataloader: ", len(trainloader))
    target_dataset = SimpleSmokeVal(args={},
                                    image_size=input_size_target,
                                    dataset_mean=IMG_MEAN)
    target_sampler = DistributedSampler(target_dataset,
                                        num_replicas=args.world_size,
                                        rank=rank,
                                        shuffle=True)
    targetloader = data.DataLoader(target_dataset,
                                   batch_size=args.batch_size,
                                   num_workers=args.num_workers,
                                   pin_memory=True,
                                   sampler=target_sampler)

    # targetloader = data.DataLoader(cityscapesDataSet(args.data_dir_target, args.data_list_target,
    #                                                  max_iters=args.num_steps * args.iter_size * args.batch_size,
    #                                                  crop_size=input_size_target,
    #                                                  scale=False, mirror=args.random_mirror, mean=IMG_MEAN,
    #                                                  set=args.set),
    #                                batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers,
    #                                pin_memory=True)

    targetloader_iter = enumerate(targetloader)
    print("Length of train dataloader: ", len(targetloader))
    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.module.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D1 = optim.Adam(model_D1.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    optimizer_D2 = optim.Adam(model_D2.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D2.zero_grad()

    if args.gan == 'Vanilla':
        bce_loss = torch.nn.BCEWithLogitsLoss()
    elif args.gan == 'LS':
        bce_loss = torch.nn.MSELoss()

    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear')

    # labels for adversarial training
    source_label = 0
    target_label = 1

    for i_iter in range(start_epoch, args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False

            # train with source
            # try:
            _, batch = next(trainloader_iter)  #.next()
            # except StopIteration:
            # trainloader = data.DataLoader(
            #     SyntheticSmokeTrain(args={}, dataset_limit=args.num_steps * args.iter_size * args.batch_size,
            #                 image_shape=input_size, dataset_mean=IMG_MEAN),
            #     batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True)
            # trainloader_iter = iter(trainloader)
            # _, batch = next(trainloader_iter)

            images, labels, _, _ = batch
            images = Variable(images).cuda(gpu)
            # print("Shape of labels", labels.shape)
            # print("Are labels all zero? ")
            # for i in range(labels.shape[0]):
            #     print("{}: All zero? {}".format(i, torch.all(labels[i]==0)))
            #     print("{}: All 255? {}".format(i, torch.all(labels[i]==255)))
            #     print("{}: Mean = {}".format(i, torch.mean(labels[i])))

            pred1, pred2 = model(images)
            # print("Pred1 and Pred2 original size: {}, {}".format(pred1.shape, pred2.shape))
            pred1 = interp(pred1)
            pred2 = interp(pred2)
            # print("Pred1 and Pred2 upsampled size: {}, {}".format(pred1.shape, pred2.shape))
            # for pred, name in zip([pred1, pred2], ['pred1', 'pred2']):
            #     print(name)
            #     for i in range(pred.shape[0]):
            #         print("{}: All zero? {}".format(i, torch.all(pred[i]==0)))
            #         print("{}: All 255? {}".format(i, torch.all(pred[i]==255)))
            #         print("{}: Mean = {}".format(i, torch.mean(pred[i])))

            loss_seg1 = loss_calc(pred1, labels, gpu, criterion)
            loss_seg2 = loss_calc(pred2, labels, gpu, criterion)
            loss = loss_seg2 + args.lambda_seg * loss_seg1

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            # print("Seg1 loss: ",loss_seg1, args.iter_size)
            # print("Seg2 loss: ",loss_seg2, args.iter_size)

            loss_seg_value1 += loss_seg1.data.cpu().item() / args.iter_size
            loss_seg_value2 += loss_seg2.data.cpu().item() / args.iter_size

            # train with target
            # try:
            _, batch = next(targetloader_iter)  #.next()
            # except StopIteration:
            #     targetloader = data.DataLoader(
            #         SimpleSmokeVal(args = {}, image_size=input_size_target, dataset_mean=IMG_MEAN),
            #                         batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers,
            #                         pin_memory=True)
            #     targetloader_iter = iter(targetloader)
            #     _, batch = next(targetloader_iter)

            images, _, _ = batch
            images = Variable(images).cuda(gpu)

            pred_target1, pred_target2 = model(images)
            pred_target1 = interp_target(pred_target1)
            pred_target2 = interp_target(pred_target2)

            D_out1 = model_D1(F.softmax(pred_target1, dim=1))
            D_out2 = model_D2(F.softmax(pred_target2, dim=1))

            loss_adv_target1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(gpu))

            loss_adv_target2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(source_label)).cuda(gpu))

            loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2
            loss = loss / args.iter_size
            loss.backward()
            loss_adv_target_value1 += loss_adv_target1.data.cpu().item(
            ) / args.iter_size
            loss_adv_target_value2 += loss_adv_target2.data.cpu().item(
            ) / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D1.parameters():
                param.requires_grad = True

            for param in model_D2.parameters():
                param.requires_grad = True

            # train with source
            pred1 = pred1.detach()
            pred2 = pred2.detach()

            D_out1 = model_D1(F.softmax(pred1, dim=1))
            D_out2 = model_D2(F.softmax(pred2, dim=1))

            loss_D1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(gpu))

            loss_D2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(source_label)).cuda(gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().item()
            loss_D_value2 += loss_D2.data.cpu().item()

            # train with target
            pred_target1 = pred_target1.detach()
            pred_target2 = pred_target2.detach()

            D_out1 = model_D1(F.softmax(pred_target1, dim=1))
            D_out2 = model_D2(F.softmax(pred_target2, dim=1))

            loss_D1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(target_label)).cuda(gpu))

            loss_D2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(target_label)).cuda(gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().item()
            loss_D_value2 += loss_D2.data.cpu().item()

        optimizer.step()
        optimizer_D1.step()
        optimizer_D2.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2,
                    loss_adv_target_value1, loss_adv_target_value2,
                    loss_D_value1, loss_D_value2))
        writer.add_scalar(f'loss/train/segmentation/1', loss_seg_value1,
                          i_iter)
        writer.add_scalar(f'loss/train/segmentation/2', loss_seg_value2,
                          i_iter)
        writer.add_scalar(f'loss/train/adversarial/1', loss_adv_target_value1,
                          i_iter)
        writer.add_scalar(f'loss/train/adversarial/2', loss_adv_target_value2,
                          i_iter)
        writer.add_scalar(f'loss/train/domain/1', loss_D_value1, i_iter)
        writer.add_scalar(f'loss/train/domain/2', loss_D_value2, i_iter)

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(
                    args.snapshot_dir, 'smoke_cross_entropy_multigpu_' +
                    str(args.num_steps_stop) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(
                    args.snapshot_dir, 'smoke_cross_entropy_multigpu_' +
                    str(args.num_steps_stop) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(
                    args.snapshot_dir, 'smoke_cross_entropy_multigpu_' +
                    str(args.num_steps_stop) + '_D2.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(
                    args.snapshot_dir,
                    'smoke_cross_entropy_multigpu_' + str(i_iter) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(
                    args.snapshot_dir,
                    'smoke_cross_entropy_multigpu_' + str(i_iter) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(
                    args.snapshot_dir,
                    'smoke_cross_entropy_multigpu_' + str(i_iter) + '_D2.pth'))
        writer.flush()
Exemplo n.º 8
0
def main():
    """Create the model and start the training."""

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    h, w = map(int, args.input_size_target.split(','))
    input_size_target = (h, w)

    cudnn.enabled = True
    gpu = args.gpu

    # Create network
    if args.model == 'DeepLab':
        model = Res_Deeplab(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http' :
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)

        new_params = model.state_dict().copy()
        for i in saved_state_dict:
            # Scale.layer5.conv2d_list.3.weight
            i_parts = i.split('.')
            # print i_parts
            if not args.num_classes == 19 or not i_parts[1] == 'layer5':
                new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
                # print i_parts
        #model.load_state_dict(new_params)
        
    if CONTINUE_FLAG==1:
        model.load_state_dict(saved_state_dict)    
    
    model.train()
    model.cuda(args.gpu)

    cudnn.benchmark = True

    # init D
    model_D_a = FCDiscriminator(num_classes=256)  # need to check
    model_D1 = FCDiscriminator(num_classes=args.num_classes)
    model_D2 = FCDiscriminator(num_classes=args.num_classes)
    
    if CONTINUE_FLAG==1:
        d1_saved_state_dict = torch.load(D1_RESTORE_FROM)
        d2_saved_state_dict = torch.load(D2_RESTORE_FROM)
        model_D1.load_state_dict(d1_saved_state_dict)
        model_D2.load_state_dict(d2_saved_state_dict)
        
    model_D_a.train()
    model_D_a.cuda(args.gpu)

    model_D1.train()
    model_D1.cuda(args.gpu)

    model_D2.train()
    model_D2.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(
        synthiaDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size,
                    crop_size=input_size,
                    scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN),
        batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(args.data_dir_target, args.data_list_target,
                                                     max_iters=args.num_steps * args.iter_size * args.batch_size,
                                                     crop_size=input_size_target,
                                                     scale=False, mirror=args.random_mirror, mean=IMG_MEAN,
                                                     set=args.set),
                                   batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers,
                                   pin_memory=True)


    targetloader_iter = enumerate(targetloader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D_a = optim.Adam(model_D_a.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D_a.zero_grad()

    optimizer_D1 = optim.Adam(model_D1.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    optimizer_D2 = optim.Adam(model_D2.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D2.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()

    # interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')
    # interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear')

    # labels for adversarial training
    source_label = 0
    target_label = 1
    mIoUs = []
    for i_iter in range(continue_start_iter+1,args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D_a.zero_grad()
        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D_a, i_iter)
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)



        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D_a.parameters():
                param.requires_grad = False

            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False

            # train with source

            _, batch = trainloader_iter.__next__()
            images, labels, _, _ = batch
            images = Variable(images).cuda(args.gpu)

            pred_a, pred1, pred2 = model(images)
            pred1=nn.functional.interpolate(pred1,size=(input_size[1], input_size[0]), mode='bilinear',align_corners=True)
            pred2 = nn.functional.interpolate(pred2, size=(input_size[1], input_size[0]), mode='bilinear',
                                              align_corners=True)
            # pred1 = interp(pred1)
            # pred2 = interp(pred2)

            loss_seg1 = loss_calc(pred1, labels, args.gpu)
            loss_seg2 = loss_calc(pred2, labels, args.gpu)
            loss = loss_seg2 + args.lambda_seg * loss_seg1

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value1 += loss_seg1.data.cpu().numpy() / args.iter_size
            loss_seg_value2 += loss_seg2.data.cpu().numpy() / args.iter_size

            # train with target

            _, batch = targetloader_iter.__next__()
            images, _, _ = batch
            images = Variable(images).cuda(args.gpu)

            lambda_wtight = (80000 - i_iter) / 80000
            if lambda_wtight > 0:
                pred_target_a, _, _ = model(images)
                D_out_a = model_D_a(pred_target_a)
                loss_adv_target_a = bce_loss(D_out_a,
                                             Variable(torch.FloatTensor(D_out_a.data.size()).fill_(source_label)).cuda(
                                                 args.gpu))
                loss_adv_target_a=LAMBDA_ADV_TARGET_A *loss_adv_target_a
                loss_adv_target_a = loss_adv_target_a / args.iter_size
                loss_adv_target_a.backward()

            _, pred_target1, pred_target2 = model(images)

            pred_target1 = nn.functional.interpolate(pred_target1,size=(input_size_target[1], input_size_target[0]), mode='bilinear',align_corners=True)
            pred_target2 = nn.functional.interpolate(pred_target2, size=(input_size_target[1], input_size_target[0]),
                                                     mode='bilinear', align_corners=True)

            D_out1 = model_D1(F.softmax(pred_target1,dim=1))
            D_out2 = model_D2(F.softmax(pred_target2,dim=1))

            loss_adv_target1 = bce_loss(D_out1,
                                       Variable(torch.FloatTensor(D_out1.data.size()).fill_(source_label)).cuda(
                                           args.gpu))

            loss_adv_target2 = bce_loss(D_out2,
                                        Variable(torch.FloatTensor(D_out2.data.size()).fill_(source_label)).cuda(
                                            args.gpu))

            loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2
            loss = loss / args.iter_size
            loss.backward()
            loss_adv_target_value1 += loss_adv_target1.data.cpu().numpy() / args.iter_size
            loss_adv_target_value2 += loss_adv_target2.data.cpu().numpy() / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D_a.parameters():
                param.requires_grad = True

            for param in model_D1.parameters():
                param.requires_grad = True

            for param in model_D2.parameters():
                param.requires_grad = True

            # train with source
            if lambda_wtight > 0:
                pred_a=pred_a.detach()
                D_out_a = model_D_a(pred_a)
                loss_D_a = bce_loss(D_out_a,
                                    Variable(torch.FloatTensor(D_out_a.data.size()).fill_(source_label)).cuda(args.gpu))
                loss_D_a = loss_D_a / args.iter_size / 2
                loss_D_a.backward()

            pred1 = pred1.detach()
            pred2 = pred2.detach()

            D_out1 = model_D1(F.softmax(pred1,dim=1))
            D_out2 = model_D2(F.softmax(pred2,dim=1))

            loss_D1 = bce_loss(D_out1,
                              Variable(torch.FloatTensor(D_out1.data.size()).fill_(source_label)).cuda(args.gpu))

            loss_D2 = bce_loss(D_out2,
                               Variable(torch.FloatTensor(D_out2.data.size()).fill_(source_label)).cuda(args.gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().numpy()
            loss_D_value2 += loss_D2.data.cpu().numpy()

            # train with target
            if lambda_wtight > 0:
                pred_target_a=pred_target_a.detach()
                D_out_a = model_D_a(pred_target_a)
                loss_D_a = bce_loss(D_out_a,
                                    Variable(torch.FloatTensor(D_out_a.data.size()).fill_(target_label)).cuda(args.gpu))
                loss_D_a = loss_D_a / args.iter_size / 2
                loss_D_a.backward()

            pred_target1 = pred_target1.detach()
            pred_target2 = pred_target2.detach()

            D_out1 = model_D1(F.softmax(pred_target1,dim=1))
            D_out2 = model_D2(F.softmax(pred_target2,dim=1))

            loss_D1 = bce_loss(D_out1,
                              Variable(torch.FloatTensor(D_out1.data.size()).fill_(target_label)).cuda(args.gpu))

            loss_D2 = bce_loss(D_out2,
                               Variable(torch.FloatTensor(D_out2.data.size()).fill_(target_label)).cuda(args.gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().numpy()
            loss_D_value2 += loss_D2.data.cpu().numpy()

        optimizer.step()
        optimizer_D_a.step()
        optimizer_D1.step()
        optimizer_D2.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
        'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'.format(
            i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(model.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps) + '.pth'))
            torch.save(model_D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps) + '_D1.pth'))
            torch.save(model_D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps) + '_D2.pth'))
            show_val(model.state_dict(), LAMBDA_ADV_TARGET_A ,i_iter)
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(model.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(model_D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))
            torch.save(model_D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth'))
            mIoU=show_val(model.state_dict(), LAMBDA_ADV_TARGET_A ,i_iter)
            mIoUs.append(str(round(np.nanmean(mIoU) * 100, 2)))
            for miou in mIoUs:
                print(miou)
Exemplo n.º 9
0
def main():
    """Create the model and start the training."""
    global args
    args = get_arguments()
    if args.dist:
        init_dist(args.launcher, backend=args.backend)
    world_size = 1
    rank = 0
    if args.dist:
        rank = dist.get_rank()
        world_size = dist.get_world_size()
    device = torch.device("cuda" if not args.cpu else "cpu")

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    cudnn.enabled = True

    # Create network
    if args.model == 'Deeplab':
        model = DeeplabMulti(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from, strict=False)

        new_params = model.state_dict().copy()
        for i in saved_state_dict:
            i_parts = i.split('.')
            if not args.num_classes == 19 or not i_parts[1] == 'layer5':
                new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
        model.load_state_dict(new_params)
    elif args.model == 'DeeplabVGG':
        model = DeeplabVGG(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)
        model.load_state_dict(saved_state_dict, strict=False)
    elif args.model == 'DeeplabVGGBN':
        deeplab_vggbn.BatchNorm = SyncBatchNorm2d
        model = deeplab_vggbn.DeeplabVGGBN(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)
            model.load_state_dict(saved_state_dict, strict=False)
            del saved_state_dict

    model.train()
    model.to(device)
    if args.dist:
        broadcast_params(model)

    if rank == 0:
        print(model)

    cudnn.benchmark = True

    # init D
    model_D1 = FCDiscriminator(num_classes=args.num_classes).to(device)
    model_D2 = FCDiscriminator(num_classes=args.num_classes).to(device)

    model_D1.train()
    model_D1.to(device)
    if args.dist:
        broadcast_params(model_D1)
    if args.restore_D is not None:
        D_dict = torch.load(args.restore_D)
        model_D1.load_state_dict(D_dict, strict=False)
        del D_dict

    model_D2.train()
    model_D2.to(device)
    if args.dist:
        broadcast_params(model_D2)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_data = GTA5BDDDataSet(args.data_dir,
                                args.data_list,
                                max_iters=args.num_steps * args.iter_size *
                                args.batch_size,
                                crop_size=input_size,
                                scale=args.random_scale,
                                mirror=args.random_mirror,
                                mean=IMG_MEAN)
    train_sampler = None
    if args.dist:
        train_sampler = DistributedSampler(train_data)
    trainloader = data.DataLoader(train_data,
                                  batch_size=args.batch_size,
                                  shuffle=False if train_sampler else True,
                                  num_workers=args.num_workers,
                                  pin_memory=False,
                                  sampler=train_sampler)

    trainloader_iter = enumerate(cycle(trainloader))

    target_data = BDDDataSet(args.data_dir_target,
                             args.data_list_target,
                             max_iters=args.num_steps * args.iter_size *
                             args.batch_size,
                             crop_size=input_size_target,
                             scale=False,
                             mirror=args.random_mirror,
                             mean=IMG_MEAN,
                             set=args.set)
    target_sampler = None
    if args.dist:
        target_sampler = DistributedSampler(target_data)
    targetloader = data.DataLoader(target_data,
                                   batch_size=args.batch_size,
                                   shuffle=False if target_sampler else True,
                                   num_workers=args.num_workers,
                                   pin_memory=False,
                                   sampler=target_sampler)

    targetloader_iter = enumerate(cycle(targetloader))

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D1 = optim.Adam(model_D1.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    optimizer_D2 = optim.Adam(model_D2.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D2.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()
    seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255)

    #interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear', align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1

    # set up tensor board
    if args.tensorboard and rank == 0:
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)

        writer = SummaryWriter(args.log_dir)

    torch.cuda.empty_cache()
    for i_iter in range(args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False

            # train with source

            _, batch = trainloader_iter.__next__()

            images, labels, size, _ = batch
            images = images.to(device)
            labels = labels.long().to(device)
            interp = nn.Upsample(size=(size[1], size[0]),
                                 mode='bilinear',
                                 align_corners=True)

            pred1 = model(images)
            pred1 = interp(pred1)

            loss_seg1 = seg_loss(pred1, labels)

            loss = loss_seg1

            # proper normalization
            loss = loss / args.iter_size / world_size
            loss.backward()
            loss_seg_value1 += loss_seg1.item() / args.iter_size

            _, batch = targetloader_iter.__next__()
            # train with target
            images, _, _ = batch
            images = images.to(device)

            pred_target1 = model(images)
            pred_target1 = interp_target(pred_target1)

            D_out1 = model_D1(F.softmax(pred_target1))
            loss_adv_target1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(source_label).to(device))

            loss = args.lambda_adv_target1 * loss_adv_target1
            loss = loss / args.iter_size / world_size

            loss.backward()
            loss_adv_target_value1 += loss_adv_target1.item() / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D1.parameters():
                param.requires_grad = True

            for param in model_D2.parameters():
                param.requires_grad = True

            # train with source
            pred1 = pred1.detach()
            D_out1 = model_D1(F.softmax(pred1))
            loss_D1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(source_label).to(device))
            loss_D1 = loss_D1 / args.iter_size / 2 / world_size
            loss_D1.backward()
            loss_D_value1 += loss_D1.item()

            # train with target
            pred_target1 = pred_target1.detach()
            D_out1 = model_D1(F.softmax(pred_target1))
            loss_D1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(target_label).to(device))
            loss_D1 = loss_D1 / args.iter_size / 2 / world_size
            loss_D1.backward()
            if args.dist:
                average_gradients(model)
                average_gradients(model_D1)
                average_gradients(model_D2)

            loss_D_value1 += loss_D1.item()

        optimizer.step()
        optimizer_D1.step()

        if rank == 0:
            if args.tensorboard:
                scalar_info = {
                    'loss_seg1': loss_seg_value1,
                    'loss_seg2': loss_seg_value2,
                    'loss_adv_target1': loss_adv_target_value1,
                    'loss_adv_target2': loss_adv_target_value2,
                    'loss_D1': loss_D_value1 * world_size,
                    'loss_D2': loss_D_value2 * world_size,
                }

                if i_iter % 10 == 0:
                    for key, val in scalar_info.items():
                        writer.add_scalar(key, val, i_iter)

            print('exp = {}'.format(args.snapshot_dir))
            print(
                'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'
                .format(i_iter, args.num_steps, loss_seg_value1,
                        loss_seg_value2, loss_adv_target_value1,
                        loss_adv_target_value2, loss_D_value1, loss_D_value2))

            if i_iter >= args.num_steps_stop - 1:
                print('save model ...')
                torch.save(
                    model.state_dict(),
                    osp.join(args.snapshot_dir,
                             'GTA5_' + str(args.num_steps_stop) + '.pth'))
                torch.save(
                    model_D1.state_dict(),
                    osp.join(args.snapshot_dir,
                             'GTA5_' + str(args.num_steps_stop) + '_D1.pth'))
                break

            if i_iter % args.save_pred_every == 0 and i_iter != 0:
                print('taking snapshot ...')
                torch.save(
                    model.state_dict(),
                    osp.join(args.snapshot_dir,
                             'GTA5_' + str(i_iter) + '.pth'))
                torch.save(
                    model_D1.state_dict(),
                    osp.join(args.snapshot_dir,
                             'GTA5_' + str(i_iter) + '_D1.pth'))
    print(args.snapshot_dir)
    if args.tensorboard and rank == 0:
        writer.close()
Exemplo n.º 10
0
def main():
    """Create the model and start the training."""
    if RESTART:
        args.snapshot_dir = RESTART_FROM
    else:
        args.snapshot_dir = generate_snapshot_name(args)

    args_dict = vars(args)
    import json

    ###### load args for restart ######
    if RESTART:
        # pdb.set_trace()
        args_dict_file = args.snapshot_dir + 'args_dict_{}.json'.format(
            RESTART_ITER)
        with open(args_dict_file) as f:
            args_dict_last = json.load(f)
        for arg in args_dict:
            args_dict[arg] = args_dict_last[arg]

    ###### load args for restart ######

    device = torch.device("cuda" if not args.cpu else "cpu")

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    cudnn.enabled = True
    cudnn.benchmark = True

    if args.model == 'DeepLab':
        model = DeeplabMulti(num_classes=args.num_classes)

    model_D1 = FCDiscriminator(num_classes=args.num_classes).to(device)
    model_D2 = FCDiscriminator(num_classes=args.num_classes).to(device)

    #### restore model_D1, D2 and model
    if RESTART:
        # pdb.set_trace()
        # model parameters
        restart_from_model = args.restart_from + 'GTA5_{}.pth'.format(
            RESTART_ITER)
        saved_state_dict = torch.load(restart_from_model)
        model.load_state_dict(saved_state_dict)

        # model_D1 parameters
        restart_from_D1 = args.restart_from + 'GTA5_{}_D1.pth'.format(
            RESTART_ITER)
        saved_state_dict = torch.load(restart_from_D1)
        model_D1.load_state_dict(saved_state_dict)

        # model_D2 parameters
        restart_from_D2 = args.restart_from + 'GTA5_{}_D2.pth'.format(
            RESTART_ITER)
        saved_state_dict = torch.load(restart_from_D2)
        model_D2.load_state_dict(saved_state_dict)

    #### model_D1, D2 are randomly initialized, model is pre-trained ResNet on ImageNet
    else:
        # model parameters
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)

        new_params = model.state_dict().copy()
        for i in saved_state_dict:
            # Scale.layer5.conv2d_list.3.weight
            i_parts = i.split('.')
            # print i_parts
            if not args.num_classes == 19 or not i_parts[1] == 'layer5':
                new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
                # print i_parts
        model.load_state_dict(new_params)

    model.train()
    model.to(device)

    model_D1.train()
    model_D1.to(device)

    model_D2.train()
    model_D2.to(device)

    #### From here, code should not be related to model reload ####
    # but we would need hyperparameters: n_iter,
    # [lr, momentum, weight_decay, betas](these are all in args)
    # args.snapshot_dir = generate_snapshot_name()

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              scale=args.random_scale,
                                              mirror=args.random_mirror,
                                              mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    # pdb.set_trace()
    targetloader_iter = enumerate(targetloader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D1 = optim.Adam(model_D1.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    optimizer_D2 = optim.Adam(model_D2.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.9, 0.99))
    optimizer_D2.zero_grad()

    if args.gan == 'Vanilla':
        bce_loss = torch.nn.BCEWithLogitsLoss()
    elif args.gan == 'LS':
        bce_loss = torch.nn.MSELoss()
    seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255)

    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1

    # set up tensor board
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    writer = SummaryWriter(args.log_dir)

    for i_iter in range(args.start_steps, args.num_steps):

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False

            # train with source

            _, batch = trainloader_iter.__next__()

            images, labels, _, _ = batch
            images = images.to(device)
            labels = labels.long().to(device)

            pred1, pred2 = model(images)
            pred1 = interp(pred1)
            pred2 = interp(pred2)
            pdb.set_trace()
            loss_seg1 = seg_loss(pred1, labels)
            loss_seg2 = seg_loss(pred2, labels)
            loss = loss_seg2 + args.lambda_seg * loss_seg1

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value1 += loss_seg1.item() / args.iter_size
            loss_seg_value2 += loss_seg2.item() / args.iter_size

            # train with target

            _, batch = targetloader_iter.__next__()
            images, _, _ = batch
            images = images.to(device)
            pdb.set_trace()
            pred_target1, pred_target2 = model(images)
            pred_target1 = interp_target(pred_target1)
            pred_target2 = interp_target(pred_target2)
            pdb.set_trace()
            D_out1 = model_D1(F.softmax(pred_target1))
            D_out2 = model_D2(F.softmax(pred_target2))

            loss_adv_target1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(source_label).to(device))

            loss_adv_target2 = bce_loss(
                D_out2,
                torch.FloatTensor(
                    D_out2.data.size()).fill_(source_label).to(device))

            loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2
            loss = loss / args.iter_size
            loss.backward()
            loss_adv_target_value1 += loss_adv_target1.item() / args.iter_size
            loss_adv_target_value2 += loss_adv_target2.item() / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D1.parameters():
                param.requires_grad = True

            for param in model_D2.parameters():
                param.requires_grad = True

            # train with source
            pred1 = pred1.detach()
            pred2 = pred2.detach()

            D_out1 = model_D1(F.softmax(pred1))
            D_out2 = model_D2(F.softmax(pred2))

            loss_D1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(source_label).to(device))

            loss_D2 = bce_loss(
                D_out2,
                torch.FloatTensor(
                    D_out2.data.size()).fill_(source_label).to(device))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.item()
            loss_D_value2 += loss_D2.item()

            # train with target
            pred_target1 = pred_target1.detach()
            pred_target2 = pred_target2.detach()

            D_out1 = model_D1(F.softmax(pred_target1))
            D_out2 = model_D2(F.softmax(pred_target2))

            loss_D1 = bce_loss(
                D_out1,
                torch.FloatTensor(
                    D_out1.data.size()).fill_(target_label).to(device))

            loss_D2 = bce_loss(
                D_out2,
                torch.FloatTensor(
                    D_out2.data.size()).fill_(target_label).to(device))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.item()
            loss_D_value2 += loss_D2.item()

        optimizer.step()
        optimizer_D1.step()
        optimizer_D2.step()

        scalar_info = {
            'loss_seg1': loss_seg_value1,
            'loss_seg2': loss_seg_value2,
            'loss_adv_target1': loss_adv_target_value1,
            'loss_adv_target2': loss_adv_target_value2,
            'loss_D1': loss_D_value1,
            'loss_D2': loss_D_value2,
        }

        if i_iter % 10 == 0:
            for key, val in scalar_info.items():
                writer.add_scalar(key, val, i_iter)

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2,
                    loss_adv_target_value1, loss_adv_target_value2,
                    loss_D_value1, loss_D_value2))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '_D2.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth'))

            ###### also record latest saved iteration #######
            args_dict['learning_rate'] = optimizer.param_groups[0]['lr']
            args_dict['learning_rate_D'] = optimizer_D1.param_groups[0]['lr']
            args_dict['start_steps'] = i_iter

            args_dict_file = args.snapshot_dir + '/args_dict_{}.json'.format(
                i_iter)
            with open(args_dict_file, 'w') as f:
                json.dump(args_dict, f)

            ###### also record latest saved iteration #######

    writer.close()
def main():
    """Create the model and start the training."""
    if RESTART:
        args.snapshot_dir = RESTART_FROM
    else:
        args.snapshot_dir = generate_snapshot_name(args)

    args_dict = vars(args)
    import json

    ###### load args for restart ######
    if RESTART:
        # pdb.set_trace()
        args_dict_file = args.snapshot_dir + '/args_dict_{}.json'.format(
            RESTART_ITER)
        with open(args_dict_file) as f:
            args_dict_last = json.load(f)
        for arg in args_dict:
            args_dict[arg] = args_dict_last[arg]

    ###### load args for restart ######

    device = torch.device("cuda" if not args.cpu else "cpu")

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    cudnn.enabled = True
    cudnn.benchmark = True

    if args.model == 'DeepLab':
        model = DeeplabMulti(num_classes=args.num_classes)

    model_D = FCDiscriminator(num_classes=2 * args.num_classes).to(device)

    #### restore model_D and model
    if RESTART:
        # pdb.set_trace()
        # model parameters
        restart_from_model = args.restart_from + 'GTA5_{}.pth'.format(
            RESTART_ITER)
        saved_state_dict = torch.load(restart_from_model)
        model.load_state_dict(saved_state_dict)

        # model_D parameters
        restart_from_D = args.restart_from + 'GTA5_{}_D.pth'.format(
            RESTART_ITER)
        saved_state_dict = torch.load(restart_from_D)
        model_D.load_state_dict(saved_state_dict)

    #### model_D1, D2 are randomly initialized, model is pre-trained ResNet on ImageNet
    else:
        # model parameters
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)

        new_params = model.state_dict().copy()
        for i in saved_state_dict:
            # Scale.layer5.conv2d_list.3.weight
            i_parts = i.split('.')
            # print i_parts
            if not args.num_classes == 19 or not i_parts[1] == 'layer5':
                new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
                # print i_parts
        model.load_state_dict(new_params)

    model.train()
    model.to(device)

    model_D.train()
    model_D.to(device)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              scale=args.random_scale,
                                              mirror=args.random_mirror,
                                              mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()
    """
    optimizer_D1 = optim.Adam(model_D1.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D1.zero_grad()

    optimizer_D2 = optim.Adam(model_D2.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D2.zero_grad()
    """

    if args.gan == 'Vanilla':
        bce_loss = torch.nn.BCEWithLogitsLoss()
    elif args.gan == 'LS':
        bce_loss = torch.nn.MSELoss()
    seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255)

    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1

    # set up tensor board
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    writer = SummaryWriter(args.log_dir)

    for i_iter in range(args.num_steps):
        # pdb.set_trace()
        loss_seg_value1 = 0
        loss_seg_value2 = 0
        adv_loss_value = 0
        d_loss_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate(optimizer_D, i_iter)
        """
        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)
        """

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False
            """
            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False
            """

            # train with source

            _, batch = trainloader_iter.__next__()

            images, labels, _, _ = batch
            images = images.to(device)
            labels = labels.long().to(device)
            # pdb.set_trace()
            # images.size() == [1, 3, 720, 1280]
            pred1, pred2 = model(images)
            # pred1, pred2 size == [1, 19, 91, 161]
            pred1 = interp(pred1)
            pred2 = interp(pred2)
            # size (1, 19, 720, 1280)
            # pdb.set_trace()

            # feature = nn.Softmax(dim=1)(pred1)
            # softmax_out = nn.Softmax(dim=1)(pred2)

            loss_seg1 = seg_loss(pred1, labels)
            loss_seg2 = seg_loss(pred2, labels)
            loss = loss_seg2 + args.lambda_seg * loss_seg1
            # pdb.set_trace()
            # proper normalization
            loss = loss / args.iter_size
            # TODO: uncomment
            loss.backward()
            loss_seg_value1 += loss_seg1.item() / args.iter_size
            loss_seg_value2 += loss_seg2.item() / args.iter_size
            # pdb.set_trace()
            # train with target

            _, batch = targetloader_iter.__next__()

            for params in model_D.parameters():
                params.requires_grad_(requires_grad=False)

            images, _, _ = batch
            images = images.to(device)
            # pdb.set_trace()
            # images.size() == [1, 3, 720, 1280]
            pred_target1, pred_target2 = model(images)

            # pred_target1, 2 == [1, 19, 91, 161]
            pred_target1 = interp_target(pred_target1)
            pred_target2 = interp_target(pred_target2)
            # pred_target1, 2 == [1, 19, 720, 1280]
            # pdb.set_trace()

            # feature_target = nn.Softmax(dim=1)(pred_target1)
            # softmax_out_target = nn.Softmax(dim=1)(pred_target2)

            # features = torch.cat((pred1, pred_target1), dim=0)
            # outputs = torch.cat((pred2, pred_target2), dim=0)
            # features.size() == [2, 19, 720, 1280]
            # softmax_out.size() == [2, 19, 720, 1280]
            # pdb.set_trace()
            # transfer_loss = CDAN([features, softmax_out], model_D, None, None, random_layer=None)
            D_out_target = CDAN(
                [F.softmax(pred_target1),
                 F.softmax(pred_target2)],
                model_D,
                cdan_implement='concat')
            dc_source = torch.FloatTensor(
                D_out_target.size()).fill_(0).to(device)
            # pdb.set_trace()
            adv_loss = nn.BCEWithLogitsLoss()(D_out_target, dc_source)
            adv_loss = adv_loss / args.iter_size
            adv_loss = args.lambda_adv * adv_loss
            # pdb.set_trace()
            # classifier_loss = nn.BCEWithLogitsLoss()(pred2,
            #        torch.FloatTensor(pred2.data.size()).fill_(source_label).cuda())
            # pdb.set_trace()
            adv_loss.backward()
            adv_loss_value += adv_loss.item()
            # optimizer_D.step()
            #TODO: normalize loss?

            for params in model_D.parameters():
                params.requires_grad_(requires_grad=True)

            pred1 = pred1.detach()
            pred2 = pred2.detach()
            D_out = CDAN([F.softmax(pred1), F.softmax(pred2)],
                         model_D,
                         cdan_implement='concat')

            dc_source = torch.FloatTensor(D_out.size()).fill_(0).to(device)
            # d_loss = CDAN(D_out, dc_source, None, None, random_layer=None)
            d_loss = nn.BCEWithLogitsLoss()(D_out, dc_source)
            d_loss = d_loss / args.iter_size
            # pdb.set_trace()
            d_loss.backward()
            d_loss_value += d_loss.item()

            pred_target1 = pred_target1.detach()
            pred_target2 = pred_target2.detach()
            D_out_target = CDAN(
                [F.softmax(pred_target1),
                 F.softmax(pred_target2)],
                model_D,
                cdan_implement='concat')

            dc_target = torch.FloatTensor(
                D_out_target.size()).fill_(1).to(device)
            d_loss = nn.BCEWithLogitsLoss()(D_out_target, dc_target)
            d_loss = d_loss / args.iter_size
            # pdb.set_trace()
            d_loss.backward()
            d_loss_value += d_loss.item()

            continue

        optimizer.step()
        optimizer_D.step()

        scalar_info = {
            'loss_seg1': loss_seg_value1,
            'loss_seg2': loss_seg_value2,
            'generator_loss': adv_loss_value,
            'discriminator_loss': d_loss_value,
        }

        if i_iter % 10 == 0:
            for key, val in scalar_info.items():
                writer.add_scalar(key, val, i_iter)
        # pdb.set_trace()
        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} generator = {4:.3f}, discriminator = {5:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2,
                    adv_loss_value, d_loss_value))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D.pth'))

            # check_original_discriminator(args, pred_target1, pred_target2, i_iter)
            save_path = args.snapshot_dir + '/eval_{}'.format(i_iter)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            # evaluate(args, save_path, args.snapshot_dir, i_iter)

            ###### also record latest saved iteration #######
            args_dict['learning_rate'] = optimizer.param_groups[0]['lr']
            args_dict['learning_rate_D'] = optimizer_D.param_groups[0]['lr']
            args_dict['start_steps'] = i_iter

            args_dict_file = args.snapshot_dir + 'args_dict_{}.json'.format(
                i_iter)
            pdb.set_trace()
            with open(args_dict_file, 'w') as f:
                json.dump(args_dict, f)

            ###### also record latest saved iteration #######

    writer.close()
Exemplo n.º 12
0
    #######################
    # Discriminator
    #######################

    discriminator = FCDiscriminator(num_classes=config.NUM_CLASSES)
    discriminator.to(device=device)

    from torchsummary import summary
    summary(discriminator, (config.NUM_CLASSES, config.CROP_H, config.CROP_W))

    logging.info(f'Discriminator:\n'
                 f'\t{config.NUM_CLASSES} input channels (classes)\n')

    if config.DIS_SAVED_WEIGHTS:
        discriminator.load_state_dict(
            torch.load(config.DIS_SAVED_WEIGHTS, map_location=device))
        logging.info(f'Discriminator loaded from {config.DIS_SAVED_WEIGHTS}')
    else:
        logging.info(f'Training Discriminator from scratch!\n')

    #######################
    #######################

    try:
        train_net(net=net,
                  discriminator=discriminator,
                  upsample=upsample,
                  epochs=args.epochs,
                  batch_size=args.batchsize,
                  lr=args.lr,
                  device=device,
Exemplo n.º 13
0
def main():
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True
    gpu = args.gpu
    np.random.seed(args.random_seed)

    # create network
    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in current model (caffe-like)
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        print(name)
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            print('copy {}'.format(name))
    model.load_state_dict(new_params)
    model.train()
    model.cuda(args.gpu)

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    # load dataset
    train_dataset = VOCDataSet(args.data_dir,
                               args.data_list,
                               crop_size=input_size,
                               scale=args.random_scale,
                               mirror=args.random_mirror,
                               mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)

    train_gt_dataset = VOCGTDataSet(args.data_dir,
                                    args.data_list,
                                    crop_size=input_size,
                                    scale=args.random_scale,
                                    mirror=args.random_mirror,
                                    mean=IMG_MEAN)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=5,
                                      pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=5,
                                         pin_memory=True)
    else:
        #sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = np.arange(train_dataset_size)
            np.random.shuffle(train_ids)

        pickle.dump(train_ids,
                    open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))

        # labeled data
        train_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])
        train_gt_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      sampler=train_sampler,
                                      num_workers=3,
                                      pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         sampler=train_gt_sampler,
                                         num_workers=3,
                                         pin_memory=True)

        # unlabeled data
        train_remain_sampler = data.sampler.SubsetRandomSampler(
            train_ids[partial_size:])
        trainloader_remain = data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             sampler=train_remain_sampler,
                                             num_workers=3,
                                             pin_memory=True)
        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)
    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    if version.parse(torch.__version__) >= version.parse('0.4.0'):
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear',
                             align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear')

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    for i_iter in range(args.num_steps):
        loss_seg_value = 0
        loss_adv_pred_value = 0

        loss_D_value = 0
        loss_D_ul_value = 0

        loss_semi_value = 0
        loss_semi_adv_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        # creating 2nd discriminator as a copy of the 1st one
        if i_iter == args.discr_split:
            model_D_ul = FCDiscriminator(num_classes=args.num_classes)
            model_D_ul.load_state_dict(net_D.state_dict())
            model_D_ul.train()
            model_D_ul.cuda(args.gpu)

            optimizer_D_ul = optim.Adam(model_D_ul.parameters(),
                                        lr=args.learning_rate_D,
                                        betas=(0.9, 0.99))

        # start training 2nd discriminator after specified number of steps
        if i_iter >= args.discr_split:
            optimizer_D_ul.zero_grad()
            adjust_learning_rate_D(optimizer_D_ul, i_iter)

        for sub_i in range(args.iter_size):
            # train Segmentation

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # don't accumulate grads in D_ul, in case split has already been made
            if i_iter >= args.discr_split:
                for param in model_D_ul.parameters():
                    param.requires_grad = False

            # do semi-supervised training first
            if args.lambda_semi_adv > 0 and i_iter >= args.semi_start_adv:
                try:
                    _, batch = trainloader_remain_iter.next()
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = trainloader_remain_iter.next()

                # only access to img
                images, _, _, _ = batch
                images = Variable(images).cuda(args.gpu)

                pred = interp(model(images))
                pred_remain = pred.detach()

                # choose discriminator depending on the iteration
                if i_iter >= args.discr_split:
                    D_out = interp(model_D_ul(F.softmax(pred)))
                else:
                    D_out = interp(model_D(F.softmax(pred)))

                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(
                    axis=1)
                ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(
                    np.bool)

                # adversarial loss
                loss_semi_adv = args.lambda_semi_adv * bce_loss(
                    D_out, make_D_label(gt_label, ignore_mask_remain,
                                        args.gpu))
                loss_semi_adv = loss_semi_adv / args.iter_size

                # true loss value without multiplier
                loss_semi_adv_value += loss_semi_adv.data.cpu().numpy(
                ) / args.lambda_semi_adv
                loss_semi_adv.backward()

            else:

                loss_semi = None
                loss_semi_adv = None

            # train with labeled images
            try:
                _, batch = trainloader_iter.next()
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = trainloader_iter.next()

            images, labels, _, _ = batch
            images = Variable(images).cuda(args.gpu)
            ignore_mask = (labels.numpy() == 255)

            pred = interp(model(images))
            D_out = interp(model_D(F.softmax(pred)))

            # computing loss
            loss_seg = loss_calc(pred, labels, args.gpu)
            loss_adv_pred = bce_loss(
                D_out, make_D_label(gt_label, ignore_mask, args.gpu))
            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value += loss_seg.data.cpu().numpy() / args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy(
            ) / args.iter_size

            # train D and D_ul

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            if i_iter >= args.discr_split:
                for param in model_D_ul.parameters():
                    param.requires_grad = True

            # train D with pred
            pred = pred.detach()

            # before split, traing D with both labeled and unlabeled
            if args.D_remain and i_iter < args.discr_split and (
                    args.lambda_semi > 0 or args.lambda_semi_adv > 0):
                pred = torch.cat((pred, pred_remain), 0)
                ignore_mask = np.concatenate((ignore_mask, ignore_mask_remain),
                                             axis=0)

            D_out = interp(model_D(F.softmax(pred)))
            loss_D = bce_loss(D_out,
                              make_D_label(pred_label, ignore_mask, args.gpu))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()

            # train D_ul with pred on unlabeled
            if i_iter >= args.discr_split and (args.lambda_semi > 0
                                               or args.lambda_semi_adv > 0):
                D_ul_out = interp(model_D_ul(F.softmax(pred_remain)))
                loss_D_ul = bce_loss(
                    D_ul_out,
                    make_D_label(pred_label, ignore_mask_remain, args.gpu))
                loss_D_ul = loss_D_ul / args.iter_size / 2
                loss_D_ul.backward()
                loss_D_ul_value += loss_D_ul.data.cpu().numpy()

            # get gt labels
            try:
                _, batch = trainloader_gt_iter.next()
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = trainloader_gt_iter.next()

            images_gt, labels_gt, _, _ = batch
            images_gt = Variable(images_gt).cuda(args.gpu)
            with torch.no_grad():
                pred_l = interp(model(images_gt))

            # train D with gt
            D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))
            loss_D = bce_loss(D_out,
                              make_D_label(gt_label, ignore_mask_gt, args.gpu))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()

            # train D_ul with pseudo_gt (gt are substituted for pred)
            if i_iter >= args.discr_split:
                D_ul_out = interp(model_D_ul(F.softmax(pred_l)))
                loss_D_ul = bce_loss(
                    D_ul_out, make_D_label(gt_label, ignore_mask_gt, args.gpu))
                loss_D_ul = loss_D_ul / args.iter_size / 2
                loss_D_ul.backward()
                loss_D_ul_value += loss_D_ul.data.cpu().numpy()

        optimizer.step()
        optimizer_D.step()
        if i_iter >= args.discr_split:
            optimizer_D_ul.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_D_ul={5:.3f}, loss_semi = {6:.3f}, loss_semi_adv = {7:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_pred_value, loss_D_value, loss_D_ul_value,
                    loss_semi_value, loss_semi_adv_value))

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                net.state_dict(),
                osp.join(
                    args.snapshot_dir, 'VOC_' + str(args.num_steps) + '_' +
                    str(args.random_seed) + '.pth'))
            torch.save(
                net_D.state_dict(),
                osp.join(
                    args.snapshot_dir, 'VOC_' + str(args.num_steps) + '_' +
                    str(args.random_seed) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                net.state_dict(),
                osp.join(
                    args.snapshot_dir, 'VOC_' + str(i_iter) + '_' +
                    str(args.random_seed) + '.pth'))
            torch.save(
                net_D.state_dict(),
                osp.join(
                    args.snapshot_dir, 'VOC_' + str(i_iter) + '_' +
                    str(args.random_seed) + '_D.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Exemplo n.º 14
0
def main():
    # 将参数的input_size 映射到整数,并赋值,从字符串转换到整数二元组
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = False
    gpu = args.gpu

    # create network
    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in current model (caffe-like)
    # 确保模型中参数的格式与要加载的参数相同
    # 返回一个字典,保存着module的所有状态(state);parameters和persistent buffers都会包含在字典中,字典的key就是parameter和buffer的 names。
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        # print (name)
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            # print('copy {}'.format(name))
    model.load_state_dict(new_params)

    # 设置为训练模式
    model.train()
    cudnn.benchmark = True

    model.cuda(gpu)

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda(gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_dataset = VOCDataSet(args.data_dir,
                               args.data_list,
                               crop_size=input_size,
                               scale=args.random_scale,
                               mirror=args.random_mirror,
                               mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)
    train_gt_dataset = VOCGTDataSet(args.data_dir,
                                    args.data_list,
                                    crop_size=input_size,
                                    scale=args.random_scale,
                                    mirror=args.random_mirror,
                                    mean=IMG_MEAN)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=5,
                                      pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=5,
                                         pin_memory=True)
    else:
        # sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = list(range(train_dataset_size))  # ?
            np.random.shuffle(train_ids)

        pickle.dump(train_ids,
                    open(osp.join(args.snapshot_dir, 'train_id.pkl'),
                         'wb'))  # 写入文件

        train_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(
            train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      sampler=train_sampler,
                                      num_workers=3,
                                      pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             sampler=train_remain_sampler,
                                             num_workers=3,
                                             pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         sampler=train_gt_sampler,
                                         num_workers=3,
                                         pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear')  # ???

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    for i_iter in range(args.num_steps):
        print("Iter:", i_iter)
        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # do semi first
            if args.lambda_semi > 0 and i_iter >= args.semi_start:
                try:
                    _, batch = next(trainloader_remain_iter)
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = next(trainloader_remain_iter)

                # only access to img
                images, _, _, _ = batch

                images = Variable(images).cuda(gpu)
                # images = Variable(images).cpu()

                pred = interp(model(images))
                D_out = interp(model_D(F.softmax(pred)))

                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(
                    axis=1)

                # produce ignore mask
                semi_ignore_mask = (D_out_sigmoid < args.mask_T)

                semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                semi_gt[semi_ignore_mask] = 255

                semi_ratio = 1.0 - float(
                    semi_ignore_mask.sum()) / semi_ignore_mask.size
                print('semi ratio: {:.4f}'.format(semi_ratio))

                if semi_ratio == 0.0:
                    loss_semi_value += 0
                else:
                    semi_gt = torch.FloatTensor(semi_gt)

                    loss_semi = args.lambda_semi * loss_calc(
                        pred, semi_gt, args.gpu)
                    loss_semi = loss_semi / args.iter_size
                    loss_semi.backward()
                    loss_semi_value += loss_semi.data.cpu().numpy(
                    )[0] / args.lambda_semi
            else:
                loss_semi = None

            # train with source

            try:
                _, batch = next(trainloader_iter)
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = next(trainloader_iter)

            images, labels, _, _ = batch

            images = Variable(images).cuda(gpu)
            # images = Variable(images).cpu()

            ignore_mask = (labels.numpy() == 255)
            pred = interp(model(images))

            loss_seg = loss_calc(pred, labels, args.gpu)

            D_out = interp(model_D(F.softmax(pred)))

            loss_adv_pred = bce_loss(D_out,
                                     make_D_label(gt_label, ignore_mask))

            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value += loss_seg.data.cpu().numpy()[0] / args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy(
            )[0] / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            # train with pred
            pred = pred.detach()

            D_out = interp(model_D(F.softmax(pred)))
            loss_D = bce_loss(D_out, make_D_label(pred_label, ignore_mask))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]

            # train with gt
            # get gt labels
            try:
                _, batch = next(trainloader_gt_iter)
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = next(trainloader_gt_iter)

            _, labels_gt, _, _ = batch
            D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
            # D_gt_v = Variable(one_hot(labels_gt)).cpu()
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))
            loss_D = bce_loss(D_out, make_D_label(gt_label, ignore_mask_gt))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]

        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_semi = {5:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_pred_value, loss_D_value, loss_semi_value))

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Exemplo n.º 15
0
def main():
    """Create the model and start the training."""

    device = torch.device("cuda" if not args.cpu else "cpu")

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    cudnn.enabled = True

    bestIoU = 0
    bestIter = 0

    # Create network
    if args.model == 'ResNet':
        model = DeeplabMulti(num_classes=args.num_classes)
        saved_state_dict = torch.load(args.restore_from)
        model.load_state_dict(saved_state_dict)

    if args.model == 'VGG':
        model = DeeplabVGG(num_classes=args.num_classes)
        saved_state_dict = torch.load(args.restore_from)
        model.load_state_dict(saved_state_dict)

    model.train()
    model.to(device)

    cudnn.benchmark = True

    # init D
    if args.model == 'ResNet':
        model_D = FCDiscriminator(num_classes=256).to(device)
        saved_state_dict = torch.load('./snapshots/BestGTA5_D.pth')
        model_D.load_state_dict(saved_state_dict)
    if args.model == 'VGG':
        model_D = FCDiscriminator(num_classes=256).to(device)

    model_D.train()
    model_D.to(device)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              scale=args.random_scale,
                                              mirror=args.random_mirror,
                                              mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSetLabel(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()
    seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255)

    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)
    test_interp = nn.Upsample(size=(1024, 2048),
                              mode='bilinear',
                              align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1

    # load calculated  class center for initilization
    class_center_source_ori = np.load('./source_center.npy')
    class_center_source_ori = torch.from_numpy(class_center_source_ori)

    class_center_target_ori = np.load('./target_center.npy')
    class_center_target_ori = torch.from_numpy(class_center_target_ori)

    # set up tensor board
    if args.tensorboard:
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)

        writer = SummaryWriter(args.log_dir)

    for i_iter in range(args.num_steps):

        loss_seg = 0
        loss_adv_target_value = 0
        loss_D_value = 0
        loss_cla_value = 0
        loss_square_value = 0
        loss_st_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        # train G

        # don't accumulate grads in D
        for param in model_D.parameters():
            param.requires_grad = False

        # train with source

        _, batch = trainloader_iter.__next__()
        images, labels, _, _ = batch
        images = images.to(device)
        labels_s = labels  # copy for center calculation
        labels = labels.long().to(device)

        feature, prediction = model(images)
        feature_s = feature  # copy for center calculation
        prediction = interp(prediction)
        loss = seg_loss(prediction, labels)
        loss.backward(retain_graph=True)
        loss_seg = loss.item()

        # train with target

        _, batch = targetloader_iter.__next__()
        images, labels_pseudo, _, _ = batch
        labels_t = labels_pseudo  # copy for center calculation
        images = images.to(device)
        labels_pseudo = labels_pseudo.long().to(device)

        feature_target, pred_target = model(images)
        feature_t = feature_target  # copy for center calculation
        _, D_out = model_D(feature_target)
        loss_adv_target = bce_loss(
            D_out,
            torch.FloatTensor(
                D_out.data.size()).fill_(source_label).to(device))
        #print(args.lambda_adv_target)
        loss = args.lambda_adv_target * loss_adv_target
        loss.backward(retain_graph=True)
        loss_adv_target_value = loss_adv_target.item()

        pred_target = interp_target(pred_target)
        loss_st = seg_loss(pred_target, labels_pseudo)
        loss_st.backward(retain_graph=True)
        loss_st_value = loss_st.item()

        # class center alignment begin
        if i_iter > 10000:
            class_center_source = class_center_cal(feature_s, labels_s)
            class_center_target = class_center_cal(feature_t, labels_t)
            class_center_source_ori = class_center_update(
                class_center_source, class_center_source_ori,
                args.lambda_center_update)
            class_center_target_ori = class_center_update(
                class_center_target, class_center_target_ori,
                args.lambda_center_update)

            class_center_source_ori = class_center_source_ori.detach(
            )  #align target center to source

            center_diff = class_center_source_ori - class_center_target_ori
            loss_square = torch.pow(center_diff, 2).sum()

            loss = args.lambda_center * loss_square
            loss.backward()
            loss_square_value = loss_square.item()
        # class center alignment end

        # train D

        # bring back requires_grad
        for param in model_D.parameters():
            param.requires_grad = True

        # train with source
        feature = feature.detach()
        cla, D_out = model_D(feature)
        cla = interp(cla)
        loss_cla = seg_loss(cla, labels)

        loss_D = bce_loss(
            D_out,
            torch.FloatTensor(
                D_out.data.size()).fill_(source_label).to(device))
        loss_D = loss_D / 2
        #print(args.lambda_s)
        loss_Disc = args.lambda_s * loss_cla + loss_D
        loss_Disc.backward()

        loss_cla_value = loss_cla.item()
        loss_D_value = loss_D.item()

        # train with target
        feature_target = feature_target.detach()
        _, D_out = model_D(feature_target)
        loss_D = bce_loss(
            D_out,
            torch.FloatTensor(
                D_out.data.size()).fill_(target_label).to(device))
        loss_D = loss_D / 2
        loss_D.backward()
        loss_D_value += loss_D.item()

        optimizer.step()
        optimizer_D.step()

        class_center_target_ori = class_center_target_ori.detach()

        if args.tensorboard:
            scalar_info = {
                'loss_seg': loss_seg,
                'loss_cla': loss_cla_value,
                'loss_adv_target': loss_adv_target_value,
                'loss_st_value': loss_st_value,
                'loss_D': loss_D_value,
            }

            if i_iter % 10 == 0:
                for key, val in scalar_info.items():
                    writer.add_scalar(key, val, i_iter)

        #print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f} loss_adv = {3:.3f} loss_D = {4:.3f} loss_cla = {5:.3f} loss_st = {6:.5f} loss_square = {7:.5f}'
            .format(i_iter, args.num_steps, loss_seg, loss_adv_target_value,
                    loss_D_value, loss_cla_value, loss_st_value,
                    loss_square_value))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            if not os.path.exists(args.save):
                os.makedirs(args.save)
            testloader = data.DataLoader(cityscapesDataSet(
                args.data_dir_target,
                args.data_list_target_test,
                crop_size=(1024, 512),
                mean=IMG_MEAN,
                scale=False,
                mirror=False,
                set='val'),
                                         batch_size=1,
                                         shuffle=False,
                                         pin_memory=True)
            model.eval()
            for index, batch in enumerate(testloader):
                if index % 100 == 0:
                    print('%d processd' % index)
                image, _, name = batch
                with torch.no_grad():
                    output1, output2 = model(Variable(image).to(device))
                output = test_interp(output2).cpu().data[0].numpy()
                output = output.transpose(1, 2, 0)
                output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)
                output = Image.fromarray(output)
                name = name[0].split('/')[-1]
                output.save('%s/%s' % (args.save, name))
            mIoUs = compute_mIoU(osp.join(args.data_dir_target, 'gtFine/val'),
                                 args.save, 'dataset/cityscapes_list')
            mIoU = round(np.nanmean(mIoUs) * 100, 2)

            print('===>  current   mIoU: ' + str(mIoU))
            print('===> last best  mIoU: ' + str(bestIoU))
            print('===> last best  iter: ' + str(bestIter))

            if mIoU > bestIoU:
                bestIoU = mIoU
                bestIter = i_iter
                torch.save(model.state_dict(),
                           osp.join(args.snapshot_dir, 'BestGTA5.pth'))
                torch.save(model_D.state_dict(),
                           osp.join(args.snapshot_dir, 'BestGTA5_D.pth'))
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D.pth'))
            model.train()

    if args.tensorboard:
        writer.close()
Exemplo n.º 16
0
def main():
    # 将参数的input_size 映射到整数,并赋值,从字符串转换到整数二元组
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = False
    gpu = args.gpu

    # create network
    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in current model (caffe-like)
    # 确保模型中参数的格式与要加载的参数相同
    # 返回一个字典,保存着module的所有状态(state);parameters和persistent buffers都会包含在字典中,字典的key就是parameter和buffer的 names。
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        # print (name)
        if name in saved_state_dict and param.size() == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            # print('copy {}'.format(name))
    model.load_state_dict(new_params)

    # 设置为训练模式
    model.train()
    cudnn.benchmark = True

    model.cuda(gpu)

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda(gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_dataset = VOCDataSet(args.data_dir, args.data_list, crop_size=input_size,
                               scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)
    train_gt_dataset = VOCGTDataSet(args.data_dir, args.data_list, crop_size=input_size,
                                    scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True)
    else:
        # sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = list(range(train_dataset_size))  # ?
            np.random.shuffle(train_ids)

        pickle.dump(train_ids, open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))  # 写入文件

        train_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size, sampler=train_sampler, num_workers=3, pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                                             batch_size=args.batch_size, sampler=train_remain_sampler, num_workers=3,
                                             pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size, sampler=train_gt_sampler, num_workers=3,
                                         pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')  # ???

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    for i_iter in range(args.num_steps):
        print("Iter:", i_iter)
        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # do semi first
            if args.lambda_semi > 0 and i_iter >= args.semi_start:
                try:
                    _, batch = next(trainloader_remain_iter)
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = next(trainloader_remain_iter)

                # only access to img
                images, _, _, _ = batch

                images = Variable(images).cuda(gpu)
                # images = Variable(images).cpu()

                pred = interp(model(images))
                D_out = interp(model_D(F.softmax(pred)))

                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(axis=1)

                # produce ignore mask
                semi_ignore_mask = (D_out_sigmoid < args.mask_T)

                semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                semi_gt[semi_ignore_mask] = 255

                semi_ratio = 1.0 - float(semi_ignore_mask.sum()) / semi_ignore_mask.size
                print('semi ratio: {:.4f}'.format(semi_ratio))

                if semi_ratio == 0.0:
                    loss_semi_value += 0
                else:
                    semi_gt = torch.FloatTensor(semi_gt)

                    loss_semi = args.lambda_semi * loss_calc(pred, semi_gt, args.gpu)
                    loss_semi = loss_semi / args.iter_size
                    loss_semi.backward()
                    loss_semi_value += loss_semi.data.cpu().numpy()[0] / args.lambda_semi
            else:
                loss_semi = None

            # train with source

            try:
                _, batch = next(trainloader_iter)
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = next(trainloader_iter)

            images, labels, _, _ = batch

            images = Variable(images).cuda(gpu)
            # images = Variable(images).cpu()

            ignore_mask = (labels.numpy() == 255)
            pred = interp(model(images))

            loss_seg = loss_calc(pred, labels, args.gpu)

            D_out = interp(model_D(F.softmax(pred)))

            loss_adv_pred = bce_loss(D_out, make_D_label(gt_label, ignore_mask))

            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value += loss_seg.data.cpu().numpy()[0] / args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0] / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            # train with pred
            pred = pred.detach()

            D_out = interp(model_D(F.softmax(pred)))
            loss_D = bce_loss(D_out, make_D_label(pred_label, ignore_mask))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]

            # train with gt
            # get gt labels
            try:
                _, batch = next(trainloader_gt_iter)
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = next(trainloader_gt_iter)

            _, labels_gt, _, _ = batch
            D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
            # D_gt_v = Variable(one_hot(labels_gt)).cpu()
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))
            loss_D = bce_loss(D_out, make_D_label(gt_label, ignore_mask_gt))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]

        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_semi = {5:.3f}'.format(
                i_iter, args.num_steps, loss_seg_value, loss_adv_pred_value, loss_D_value, loss_semi_value))

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(model.state_dict(), osp.join(args.snapshot_dir, 'VOC_' + str(args.num_steps) + '.pth'))
            torch.save(model_D.state_dict(), osp.join(args.snapshot_dir, 'VOC_' + str(args.num_steps) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(model.state_dict(), osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth'))
            torch.save(model_D.state_dict(), osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Exemplo n.º 17
0
def main():
    """Create the model and start the training."""

    device = torch.device("cuda" if not args.cpu else "cpu")
    cudnn.benchmark = True
    cudnn.enabled = True

    w, h = map(int, args.input_size.split(','))
    input_size = (w, h)

    w, h = map(int, args.input_size_target.split(','))
    input_size_target = (w, h)

    Iter = 0
    bestIoU = 0

    # Create network
    # init G
    if args.model == 'DeepLab':
        model = DeeplabMultiFeature(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)
        if args.continue_train:
            if list(saved_state_dict.keys())[0].split('.')[0] == 'module':
                for key in saved_state_dict.keys():
                    saved_state_dict['.'.join(
                        key.split('.')[1:])] = saved_state_dict.pop(key)
            model.load_state_dict(saved_state_dict)
        else:
            new_params = model.state_dict().copy()
            for i in saved_state_dict:
                i_parts = i.split('.')
                if not args.num_classes == 19 or not i_parts[1] == 'layer5':
                    new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
            model.load_state_dict(new_params)

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes).to(device)

    if args.continue_train:
        model_weights_path = args.restore_from
        temp = model_weights_path.split('.')
        temp[-2] = temp[-2] + '_D'
        model_D_weights_path = '.'.join(temp)
        model_D.load_state_dict(torch.load(model_D_weights_path))
        temp = model_weights_path.split('.')
        temp = temp[-2][-9:]
        Iter = int(temp.split('_')[1]) + 1

    model.train()
    model.to(device)

    model_D.train()
    model_D.to(device)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    # init data loader
    if args.data_dir.split('/')[-1] == 'gta5_deeplab':
        trainset = GTA5DataSet(args.data_dir,
                               args.data_list,
                               max_iters=args.num_steps * args.iter_size *
                               args.batch_size,
                               crop_size=input_size,
                               scale=args.random_scale,
                               mirror=args.random_mirror,
                               mean=IMG_MEAN)
    elif args.data_dir.split('/')[-1] == 'syn_deeplab':
        trainset = synthiaDataSet(args.data_dir,
                                  args.data_list,
                                  max_iters=args.num_steps * args.iter_size *
                                  args.batch_size,
                                  crop_size=input_size,
                                  scale=args.random_scale,
                                  mirror=args.random_mirror,
                                  mean=IMG_MEAN)

    trainloader = data.DataLoader(trainset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)
    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)
    targetloader_iter = enumerate(targetloader)

    # init optimizer
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level="O2",
                                      keep_batchnorm_fp32=True,
                                      loss_scale="dynamic")

    model_D, optimizer_D = amp.initialize(model_D,
                                          optimizer_D,
                                          opt_level="O2",
                                          keep_batchnorm_fp32=True,
                                          loss_scale="dynamic")

    # init loss
    bce_loss = torch.nn.BCEWithLogitsLoss()
    seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255)
    L1_loss = torch.nn.L1Loss(reduction='none')

    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)
    test_interp = nn.Upsample(size=(1024, 2048),
                              mode='bilinear',
                              align_corners=True)

    # labels for adversarial training
    source_label = 0
    target_label = 1

    # init prototype
    num_prototype = args.num_prototype
    num_ins = args.num_prototype * 10
    src_cls_features = torch.zeros([len(BG_LABEL), num_prototype, 2048],
                                   dtype=torch.float32).to(device)
    src_cls_ptr = np.zeros(len(BG_LABEL), dtype=np.uint64)
    src_ins_features = torch.zeros([len(FG_LABEL), num_ins, 2048],
                                   dtype=torch.float32).to(device)
    src_ins_ptr = np.zeros(len(FG_LABEL), dtype=np.uint64)

    # set up tensor board
    if args.tensorboard:
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)
        writer = SummaryWriter(args.log_dir)

    # start training
    for i_iter in range(Iter, args.num_steps):

        loss_seg_value = 0
        loss_adv_target_value = 0
        loss_D_value = 0
        loss_cls_value = 0
        loss_ins_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D

            for param in model_D.parameters():
                param.requires_grad = False

            # train with source

            _, batch = trainloader_iter.__next__()

            images, labels, _, _ = batch
            images = images.to(device)
            labels = labels.long().to(device)

            src_feature, pred = model(images)
            pred_softmax = F.softmax(pred, dim=1)
            pred_idx = torch.argmax(pred_softmax, dim=1)

            right_label = F.interpolate(labels.unsqueeze(0).float(),
                                        (pred_idx.size(1), pred_idx.size(2)),
                                        mode='nearest').squeeze(0).long()
            right_label[right_label != pred_idx] = 255

            for ii in range(len(BG_LABEL)):
                cls_idx = BG_LABEL[ii]
                mask = right_label == cls_idx
                if torch.sum(mask) == 0:
                    continue
                feature = global_avg_pool(src_feature, mask.float())
                if cls_idx != torch.argmax(
                        torch.squeeze(model.layer6(
                            feature.half()).float())).item():
                    continue
                src_cls_features[ii,
                                 int(src_cls_ptr[ii] %
                                     num_prototype), :] = torch.squeeze(
                                         feature).clone().detach()
                src_cls_ptr[ii] += 1

            seg_ins = seg_label(right_label.squeeze())
            for ii in range(len(FG_LABEL)):
                cls_idx = FG_LABEL[ii]
                segmask, pixelnum = seg_ins[ii]
                if len(pixelnum) == 0:
                    continue
                sortmax = np.argsort(pixelnum)[::-1]
                for i in range(min(10, len(sortmax))):
                    mask = segmask == (sortmax[i] + 1)
                    feature = global_avg_pool(src_feature, mask.float())
                    if cls_idx != torch.argmax(
                            torch.squeeze(
                                model.layer6(feature.half()).float())).item():
                        continue
                    src_ins_features[ii, int(src_ins_ptr[ii] %
                                             num_ins), :] = torch.squeeze(
                                                 feature).clone().detach()
                    src_ins_ptr[ii] += 1

            pred = interp(pred)
            loss_seg = seg_loss(pred, labels)
            loss = loss_seg

            # proper normalization
            loss = loss / args.iter_size
            amp_backward(loss, optimizer)
            loss_seg_value += loss_seg.item() / args.iter_size

            # train with target

            _, batch = targetloader_iter.__next__()
            images, _, _ = batch
            images = images.to(device)

            trg_feature, pred_target = model(images)

            pred_target_softmax = F.softmax(pred_target, dim=1)
            pred_target_idx = torch.argmax(pred_target_softmax, dim=1)

            loss_cls = torch.zeros(1).to(device)
            loss_ins = torch.zeros(1).to(device)
            if i_iter > 0:
                for ii in range(len(BG_LABEL)):
                    cls_idx = BG_LABEL[ii]
                    if src_cls_ptr[ii] / num_prototype <= 1:
                        continue
                    mask = pred_target_idx == cls_idx
                    feature = global_avg_pool(trg_feature, mask.float())
                    if cls_idx != torch.argmax(
                            torch.squeeze(
                                model.layer6(feature.half()).float())).item():
                        continue
                    ext_feature = feature.squeeze().expand(num_prototype, 2048)
                    loss_cls += torch.min(
                        torch.sum(L1_loss(ext_feature,
                                          src_cls_features[ii, :, :]),
                                  dim=1) / 2048.)

                seg_ins = seg_label(pred_target_idx.squeeze())
                for ii in range(len(FG_LABEL)):
                    cls_idx = FG_LABEL[ii]
                    if src_ins_ptr[ii] / num_ins <= 1:
                        continue
                    segmask, pixelnum = seg_ins[ii]
                    if len(pixelnum) == 0:
                        continue
                    sortmax = np.argsort(pixelnum)[::-1]
                    for i in range(min(10, len(sortmax))):
                        mask = segmask == (sortmax[i] + 1)
                        feature = global_avg_pool(trg_feature, mask.float())
                        feature = feature.squeeze().expand(num_ins, 2048)
                        loss_ins += torch.min(
                            torch.sum(L1_loss(feature,
                                              src_ins_features[ii, :, :]),
                                      dim=1) / 2048.) / min(10, len(sortmax))

            pred_target = interp_target(pred_target)

            D_out = model_D(F.softmax(pred_target, dim=1))
            loss_adv_target = bce_loss(
                D_out,
                torch.FloatTensor(
                    D_out.data.size()).fill_(source_label).to(device))

            loss = args.lambda_adv_target * loss_adv_target + args.lambda_adv_cls * loss_cls + args.lambda_adv_ins * loss_ins
            loss = loss / args.iter_size
            amp_backward(loss, optimizer)
            loss_adv_target_value += loss_adv_target.item() / args.iter_size

            # train D

            # bring back requires_grad

            for param in model_D.parameters():
                param.requires_grad = True

            # train with source
            pred = pred.detach()
            D_out = model_D(F.softmax(pred, dim=1))

            loss_D = bce_loss(
                D_out,
                torch.FloatTensor(
                    D_out.data.size()).fill_(source_label).to(device))
            loss_D = loss_D / args.iter_size / 2
            amp_backward(loss_D, optimizer_D)
            loss_D_value += loss_D.item()

            # train with target
            pred_target = pred_target.detach()
            D_out = model_D(F.softmax(pred_target, dim=1))

            loss_D = bce_loss(
                D_out,
                torch.FloatTensor(
                    D_out.data.size()).fill_(target_label).to(device))
            loss_D = loss_D / args.iter_size / 2
            amp_backward(loss_D, optimizer_D)
            loss_D_value += loss_D.item()

        optimizer.step()
        optimizer_D.step()

        if args.tensorboard:
            scalar_info = {
                'loss_seg': loss_seg_value,
                'loss_adv_target': loss_adv_target_value,
                'loss_D': loss_D_value,
            }

            if i_iter % 10 == 0:
                for key, val in scalar_info.items():
                    writer.add_scalar(key, val, i_iter)

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv = {3:.3f} loss_D = {4:.3f} loss_cls = {5:.3f} loss_ins = {6:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_target_value, loss_D_value, loss_cls.item(),
                    loss_ins.item()))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps_stop) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            if not os.path.exists(args.save):
                os.makedirs(args.save)
            testloader = data.DataLoader(cityscapesDataSet(
                args.data_dir_target,
                args.data_list_target_test,
                crop_size=(1024, 512),
                mean=IMG_MEAN,
                scale=False,
                mirror=False,
                set='val'),
                                         batch_size=1,
                                         shuffle=False,
                                         pin_memory=True)
            model.eval()
            for index, batch in enumerate(testloader):
                image, _, name = batch
                with torch.no_grad():
                    output1, output2 = model(Variable(image).to(device))
                output = test_interp(output2).cpu().data[0].numpy()
                output = output.transpose(1, 2, 0)
                output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)
                output = Image.fromarray(output)
                name = name[0].split('/')[-1]
                output.save('%s/%s' % (args.save, name))
            mIoUs = compute_mIoU(osp.join(args.data_dir_target, 'gtFine/val'),
                                 args.save, 'dataset/cityscapes_list')
            mIoU = round(np.nanmean(mIoUs) * 100, 2)
            if mIoU > bestIoU:
                bestIoU = mIoU
                torch.save(model.state_dict(),
                           osp.join(args.snapshot_dir, 'BestGTA5.pth'))
                torch.save(model_D.state_dict(),
                           osp.join(args.snapshot_dir, 'BestGTA5_D.pth'))
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D.pth'))
            model.train()

    if args.tensorboard:
        writer.close()
def main():

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True
    gpu = args.gpu

    # create network
    model = DeeplabMulti(num_classes=args.num_classes)
    #model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from, map_location='cuda:0')

    # only copy the params that exist in current model (caffe-like)
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        print(name)
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            print('copy {}'.format(name))
    model.load_state_dict(new_params)

    model.train()
    model.cuda(args.gpu)
    #summary(model,(3,7,7))

    cudnn.benchmark = True

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda(args.gpu)
    #summary(model_D, (21,321,321))
    #quit()

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_dataset = cityscapesDataSet(max_iters=args.num_steps *
                                      args.iter_size * args.batch_size,
                                      scale=args.random_scale)
    train_dataset_size = len(train_dataset)
    train_gt_dataset = cityscapesDataSet(max_iters=args.num_steps *
                                         args.iter_size * args.batch_size,
                                         scale=args.random_scale)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=5,
                                         pin_memory=True)
    else:
        # sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = list(range(train_dataset_size))
            np.random.shuffle(train_ids)

        pickle.dump(train_ids,
                    open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))

        train_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(
            train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,
                                      sampler=train_sampler,
                                      batch_size=args.batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                                             sampler=train_remain_sampler,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         sampler=train_gt_sampler,
                                         batch_size=args.batch_size,
                                         shuffle=False,
                                         num_workers=args.num_workers,
                                         pin_memory=True)
        trainloader_remain_iter = enumerate(trainloader)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    if version.parse(torch.__version__) >= version.parse('0.4.0'):
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear',
                             align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]),
                             mode='bilinear')

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    for i_iter in range(args.num_steps):

        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0
        loss_semi_adv_value = 0
        loss_laplacian = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # do semi first
            if (args.lambda_semi > 0 or args.lambda_semi_adv > 0
                ) and i_iter >= args.semi_start_adv:
                try:
                    _, batch = trainloader_remain_iter.__next__()
                except:
                    trainloader_remain_iter = enumerate(trainloader)
                    _, batch = trainloader_remain_iter.__next__()

                # only access to img
                images, _, _, _ = batch
                images = Variable(images).cuda(args.gpu)

                try:
                    pred = interp(model(images))
                except RuntimeError as exception:
                    if "out of memory" in str(exception):
                        print("WARNING: out of memory")
                        if hasattr(torch.cuda, 'empty_cache'):
                            torch.cuda.empty_cache()
                    else:
                        raise exception

                pred_remain = pred.detach()

                D_out = interp(model_D(F.softmax(pred)))
                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(
                    axis=1)

                ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(
                    np.bool)

                loss_semi_adv = args.lambda_semi_adv * bce_loss(
                    D_out, make_D_label(gt_label, ignore_mask_remain))
                loss_semi_adv = loss_semi_adv / args.iter_size

                #loss_semi_adv.backward()
                loss_semi_adv_value += loss_semi_adv.data.cpu().numpy(
                ) / args.lambda_semi_adv

                if args.lambda_semi <= 0 or i_iter < args.semi_start:
                    loss_semi_adv.backward()
                    loss_semi_value = 0
                else:
                    # produce ignore mask
                    semi_ignore_mask = (D_out_sigmoid < args.mask_T)

                    semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                    semi_gt[semi_ignore_mask] = 255

                    semi_ratio = 1.0 - float(
                        semi_ignore_mask.sum()) / semi_ignore_mask.size
                    print('semi ratio: {:.4f}'.format(semi_ratio))

                    if semi_ratio == 0.0:
                        loss_semi_value += 0
                    else:
                        semi_gt = torch.FloatTensor(semi_gt)

                        loss_semi = args.lambda_semi * loss_calc(
                            pred, semi_gt, args.gpu)
                        loss_semi = loss_semi / args.iter_size
                        loss_semi_value += loss_semi.data.cpu().numpy(
                        ) / args.lambda_semi
                        loss_semi += loss_semi_adv
                        loss_semi.backward()

            else:
                loss_semi = None
                loss_semi_adv = None

            # train with source

            try:
                _, batch = trainloader_iter.__next__()
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = trainloader_iter.__next__()

            images, labels, _, _ = batch
            images = Variable(images).cuda(args.gpu)
            ignore_mask = (labels.numpy() == 255)

            try:
                pred = interp(model(images))
            except RuntimeError as exception:
                if "out of memory" in str(exception):
                    print("WARNING: out of memory")
                    if hasattr(torch.cuda, 'empty_cache'):
                        torch.cuda.empty_cache()
                else:
                    raise exception
            for i in range(1):
                imagess = torch.zeros(1280, 720).cuda()
                for j in range(19):
                    try:
                        imagess += pred[i, j, :, :].reshape(1280, 720)
                    except IndexError:
                        pass
                try:
                    label = labels[i, :, :].reshape(1280, 720).cuda()
                except IndexError:
                    pass
                imagess = torch.from_numpy(
                    cv2.Laplacian(imagess.cpu().detach().numpy(), -1)).cuda()
                labell = torch.from_numpy(
                    cv2.Laplacian(label.cpu().detach().numpy(), -1)).cuda()
                imagess = imagess.reshape(1, 1, 1280, 720)
                labell = labell.reshape(1, 1, 1280, 720)
                l = bce_loss(imagess, labell)

            loss_laplacian = l
            loss_seg = loss_calc(pred, labels, args.gpu)

            D_out = interp(model_D(F.softmax(pred)))

            loss_adv_pred = bce_loss(D_out,
                                     make_D_label(gt_label, ignore_mask))

            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred - loss_laplacian

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value += loss_seg.data.cpu().numpy() / args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy(
            ) / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            # train with pred
            pred = pred.detach()

            if args.D_remain:
                pred = torch.cat((pred, pred_remain), 0)
                ignore_mask = np.concatenate((ignore_mask, ignore_mask_remain),
                                             axis=0)

            D_out = interp(model_D(F.softmax(pred)))
            loss_D = bce_loss(D_out, make_D_label(pred_label, ignore_mask))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()

            # train with gt
            # get gt labels
            try:
                _, batch = trainloader_gt_iter.__next__()
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = trainloader_gt_iter.__next__()

            _, labels_gt, _, _ = batch
            D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))
            loss_D = bce_loss(D_out, make_D_label(gt_label, ignore_mask_gt))
            loss_D = loss_D / args.iter_size / 2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()

        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_semi = {5:.3f}, loss_semi_adv = {6:.3f}, loss_laplacian = {7:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value,
                    loss_adv_pred_value, loss_D_value, loss_semi_value,
                    loss_semi_adv_value, loss_laplacian))

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'CITY_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'CITY_' + str(args.num_steps) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'CITY_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'CITY_' + str(i_iter) + '_D.pth'))
            #torch.cuda.empty_cache()

    end = timeit.default_timer()
    print(end - start, 'seconds')
def main():
    """Create the model and start the training."""

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    h, w = map(int, args.input_size_target.split(','))
    input_size_target = (h, w)

    cudnn.enabled = True
    gpu = args.gpu

    # Create network
    if args.model == 'DeepLab':
        model = Res_Deeplab(num_classes=args.num_classes)
        if args.restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(args.restore_from)
        else:
            saved_state_dict = torch.load(args.restore_from)

        # new_params = model.state_dict().copy()
        # for i in saved_state_dict:
        #     # Scale.layer5.conv2d_list.3.weight
        #     i_parts = i.split('.')
        #     # print i_parts
        #     if not args.num_classes == 19 or not i_parts[1] == 'layer5':
        #         new_params['.'.join(i_parts[1:])] = saved_state_dict[i]
        #         # print i_parts
        # model.load_state_dict(new_params)
        model.load_state_dict(saved_state_dict)

    model.train()
    model.cuda(args.gpu)

    cudnn.benchmark = True

    # init D
    model_D1 = FCDiscriminator(num_classes=args.num_classes)
    model_D2 = FCDiscriminator(num_classes=args.num_classes)

    # # load discriminator params
    saved_state_dict_D1 = torch.load(D1_RESTORE_FROM)
    saved_state_dict_D2 = torch.load(D2_RESTORE_FROM)
    model_D1.load_state_dict(saved_state_dict_D1)
    model_D2.load_state_dict(saved_state_dict_D2)

    model_D1.train()
    model_D1.cuda(args.gpu)

    model_D2.train()
    model_D2.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    trainloader = data.DataLoader(GTA5DataSet(args.data_dir,
                                              args.data_list,
                                              max_iters=args.num_steps *
                                              args.iter_size * args.batch_size,
                                              crop_size=input_size,
                                              scale=args.random_scale,
                                              mirror=args.random_mirror,
                                              mean=IMG_MEAN),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True)

    trainloader_iter = enumerate(trainloader)

    targetloader = data.DataLoader(cityscapesDataSet(
        args.data_dir_target,
        args.data_list_target,
        max_iters=args.num_steps * args.iter_size * args.batch_size,
        crop_size=input_size_target,
        scale=False,
        mirror=args.random_mirror,
        mean=CITY_IMG_MEAN,
        set=args.set),
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   pin_memory=True)

    targetloader_iter = enumerate(targetloader)

    # implement model.optim_parameters(args) to handle different models' lr setting

    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    optimizer_D1 = optim.Adam(model_D1.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.7, 0.99))
    optimizer_D2 = optim.Adam(model_D2.parameters(),
                              lr=args.learning_rate_D,
                              betas=(0.7, 0.99))

    opti_state_dict = torch.load(OPTI_RESTORE_FROM)
    opti_state_dict_d1 = torch.load(OPTI_D1_RESTORE_FROM)
    opti_state_dict_d2 = torch.load(OPTI_D2_RESTORE_FROM)
    optimizer.load_state_dict(opti_state_dict)
    optimizer_D1.load_state_dict(opti_state_dict_d1)
    optimizer_D1.load_state_dict(opti_state_dict_d2)

    optimizer.zero_grad()
    optimizer_D1.zero_grad()
    optimizer_D2.zero_grad()

    bce_loss = torch.nn.BCEWithLogitsLoss()

    interp = nn.Upsample(size=(input_size[1], input_size[0]),
                         mode='bilinear',
                         align_corners=True)
    interp_target = nn.Upsample(size=(input_size_target[1],
                                      input_size_target[0]),
                                mode='bilinear',
                                align_corners=True)

    # labels for adversarial training
    source_label = 1
    target_label = 0
    mIoUs = []
    i_iters = []

    for i_iter in range(args.num_steps):
        if i_iter <= iter_start:
            continue

        loss_seg_value1 = 0
        loss_adv_target_value1 = 0
        loss_D_value1 = 0

        loss_seg_value2 = 0
        loss_adv_target_value2 = 0
        loss_D_value2 = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)

        optimizer_D1.zero_grad()
        optimizer_D2.zero_grad()
        adjust_learning_rate_D(optimizer_D1, i_iter)
        adjust_learning_rate_D(optimizer_D2, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D1.parameters():
                param.requires_grad = False

            for param in model_D2.parameters():
                param.requires_grad = False

            # train with source

            _, batch = trainloader_iter.__next__()
            images, labels, _, _ = batch
            images = Variable(images).cuda(args.gpu)

            pred1, pred2 = model(images)
            pred1 = interp(pred1)
            pred2 = interp(pred2)

            loss_seg1 = loss_calc(pred1, labels, args.gpu)
            loss_seg2 = loss_calc(pred2, labels, args.gpu)
            loss = loss_seg2 + args.lambda_seg * loss_seg1

            # proper normalization
            loss = loss / args.iter_size
            loss.backward()
            loss_seg_value1 += loss_seg1.data.cpu().numpy() / args.iter_size
            loss_seg_value2 += loss_seg2.data.cpu().numpy() / args.iter_size

            # train with target

            _, batch = targetloader_iter.__next__()
            images, _, name = batch
            images = Variable(images).cuda(args.gpu)

            pred_target1, pred_target2 = model(images)
            pred_target1 = interp_target(pred_target1)
            pred_target2 = interp_target(pred_target2)

            D_out1 = model_D1(F.softmax(pred_target1, dim=1))
            D_out2 = model_D2(F.softmax(pred_target2, dim=1))

            loss_adv_target1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            loss_adv_target2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2
            loss = loss / args.iter_size
            loss.backward()
            loss_adv_target_value1 += loss_adv_target1.data.cpu().numpy(
            ) / args.iter_size
            loss_adv_target_value2 += loss_adv_target2.data.cpu().numpy(
            ) / args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D1.parameters():
                param.requires_grad = True

            for param in model_D2.parameters():
                param.requires_grad = True

            # train with source
            pred1 = pred1.detach()
            pred2 = pred2.detach()

            D_out1 = model_D1(F.softmax(pred1, dim=1))
            D_out2 = model_D2(F.softmax(pred2, dim=1))

            weight_s = float(D_out2.mean().data.cpu().numpy())

            loss_D1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            loss_D2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(source_label)).cuda(
                            args.gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().numpy()
            loss_D_value2 += loss_D2.data.cpu().numpy()

            # train with target
            pred_target1 = pred_target1.detach()
            pred_target2 = pred_target2.detach()

            D_out1 = model_D1(F.softmax(pred_target1, dim=1))
            D_out2 = model_D2(F.softmax(pred_target2, dim=1))

            weight_t = float(D_out2.mean().data.cpu().numpy())
            # if weight_b>0.5 and i_iter>500:
            #     confidence_map = interp(D_out2).cpu().data[0][0].numpy()
            #     name = name[0].split('/')[-1]
            #     confidence_map=255*confidence_map
            #     confidence_output=Image.fromarray(confidence_map.astype(np.uint8))
            #     confidence_output.save('./result/confid_map/%s.png' % (name.split('.')[0]))
            #     zq=1
            print(weight_s, weight_t)

            loss_D1 = bce_loss(
                D_out1,
                Variable(
                    torch.FloatTensor(
                        D_out1.data.size()).fill_(target_label)).cuda(
                            args.gpu))

            loss_D2 = bce_loss(
                D_out2,
                Variable(
                    torch.FloatTensor(
                        D_out2.data.size()).fill_(target_label)).cuda(
                            args.gpu))

            loss_D1 = loss_D1 / args.iter_size / 2
            loss_D2 = loss_D2 / args.iter_size / 2

            loss_D1.backward()
            loss_D2.backward()

            loss_D_value1 += loss_D1.data.cpu().numpy()
            loss_D_value2 += loss_D2.data.cpu().numpy()

        optimizer.step()
        optimizer_D1.step()
        optimizer_D2.step()

        # print('exp = {}'.format(args.snapshot_dir))
        print(
            'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f}'
            .format(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2,
                    loss_adv_target_value1, loss_adv_target_value2,
                    loss_D_value1, loss_D_value2))

        if i_iter >= args.num_steps_stop - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(args.num_steps) + '_D2.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth'))
            torch.save(
                model_D1.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth'))
            torch.save(
                model_D2.state_dict(),
                osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth'))
            torch.save(
                optimizer.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(i_iter) + '_optimizer.pth'))
            torch.save(
                optimizer_D1.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(i_iter) + '_optimizer_D1.pth'))
            torch.save(
                optimizer_D2.state_dict(),
                osp.join(args.snapshot_dir,
                         'GTA5_' + str(i_iter) + '_optimizer_D2.pth'))
            show_pred_sv_dir = pre_sv_dir.format(i_iter)
            mIoU = show_val(model.state_dict(), show_pred_sv_dir)
            mIoUs.append(str(round(np.nanmean(mIoU) * 100, 2)))
            i_iters.append(i_iter)
            print_i = 0
            for miou in mIoUs:
                print('i{0}: {1}'.format(i_iters[print_i], miou))
                print_i = print_i + 1
Exemplo n.º 20
0
def main():
    """Create the model and start the evaluation process."""

    args = get_arguments()

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    gpu0 = args.gpu

    model = Res_Deeplab(num_classes=args.num_classes)

    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)
    model.load_state_dict(saved_state_dict)

    model.eval()
    model.cuda(gpu0)

    #===========load discriminator model====begin===
    model_d2 = FCDiscriminator(num_classes=args.num_classes)
    d2_state_dict = torch.load(args.dis_restore_from)
    model_d2.load_state_dict(d2_state_dict)
    model_d2.eval()
    model_d2.cuda(gpu0)

    #===========load discriminator model====end===
    testloader = data.DataLoader(cityscapesDataSet(args.data_dir,
                                                   args.data_list,
                                                   crop_size=(1024, 512),
                                                   mean=IMG_MEAN,
                                                   scale=False,
                                                   mirror=False,
                                                   set=args.set),
                                 batch_size=1,
                                 shuffle=False,
                                 pin_memory=True)

    interp = nn.Upsample(size=(1024, 2048),
                         mode='bilinear',
                         align_corners=True)
    # interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear')

    out_values = []
    fine_out_values = []
    retrain_list = []
    file = open(CITYS_RETRAIN_TXT, 'w')
    for index, batch in enumerate(testloader):
        if index % 20 == 0:
            print('%d processd of %d' % (index, len(testloader)))
        image, _, name = batch
        output1, output2 = model(Variable(image, volatile=True).cuda(gpu0))
        ini_output = interp(output2)
        d2_out1 = model_d2(F.softmax(ini_output,
                                     dim=1))  #.cpu().data[0].numpy()
        out_valu = d2_out1.mean()
        out_valu_img = np.array([[name[0]], out_valu.cpu().data.numpy()])

        out_values.extend(out_valu_img)
        if out_valu.cpu().data.numpy() > 0.64:
            fine_out_valu_img = np.array([[name[0]],
                                          out_valu.cpu().data.numpy()])
            fine_out_values.extend(fine_out_valu_img)

            file.write(name[0] + '\n')
            output = interp(output2).cpu().data[0].numpy()

            output = output.transpose(1, 2, 0)
            output = np.asarray(np.argmax(output, axis=2), dtype=np.uint8)

            output_col = colorize_mask(output)
            name = name[0].split('/')[-1]
            # output_col.save('%s/%s_color.png' % (args.save, name.split('.')[0]))
            output_col.save('%s/%s.png' % (args.save, name.split('.')[0]))
            # print('its confidence value is %f' % out_valu)

            # plt.imshow(output_col)
            # plt.title(str(out_valu))
            # plt.show()

            # output = Image.fromarray(output)
            # output.save('%s/%s' % (args.save, name))

    out_values = np.array(out_values)

    np.save(CITYS_VALUES_SV_PATH, out_values)
    np.save(CITYS_FINE_VALUES_SV_PATH, fine_out_values)

    file.close()
Exemplo n.º 21
0
def main():

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True
    gpu = args.gpu

    # create network
    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in current model (caffe-like)
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        print(name)
        if name in saved_state_dict and param.size(
        ) == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            print('copy {}'.format(name))
    model.load_state_dict(new_params)

    model.train()
    model.cuda(args.gpu)

    cudnn.benchmark = True

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))
    model_D.train()
    model_D.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    train_dataset = VOCDataSet(args.data_dir,
                               args.data_list,
                               crop_size=input_size,
                               scale=args.random_scale,
                               mirror=args.random_mirror,
                               mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)

    train_gt_dataset = VOCGTDataSet(args.data_dir,
                                    args.data_list,
                                    crop_size=input_size,
                                    scale=args.random_scale,
                                    mirror=args.random_mirror,
                                    mean=IMG_MEAN)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=16,
                                      pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=16,
                                         pin_memory=True)
    else:
        #sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = np.arange(train_dataset_size)
            np.random.shuffle(train_ids)

        pickle.dump(train_ids,
                    open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))

        train_sampler_all = data.sampler.SubsetRandomSampler(train_ids)
        train_gt_sampler_all = data.sampler.SubsetRandomSampler(train_ids)
        train_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(
            train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(
            train_ids[:partial_size])

        trainloader_all = data.DataLoader(train_dataset,
                                          batch_size=args.batch_size,
                                          sampler=train_sampler_all,
                                          num_workers=16,
                                          pin_memory=True)
        trainloader_gt_all = data.DataLoader(train_gt_dataset,
                                             batch_size=args.batch_size,
                                             sampler=train_gt_sampler_all,
                                             num_workers=16,
                                             pin_memory=True)
        trainloader = data.DataLoader(train_dataset,
                                      batch_size=args.batch_size,
                                      sampler=train_sampler,
                                      num_workers=16,
                                      pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                                             batch_size=args.batch_size,
                                             sampler=train_remain_sampler,
                                             num_workers=16,
                                             pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                                         batch_size=args.batch_size,
                                         sampler=train_gt_sampler,
                                         num_workers=16,
                                         pin_memory=True)

        trainloader_remain_iter = iter(trainloader_remain)

    trainloader_all_iter = iter(trainloader_all)
    trainloader_iter = iter(trainloader)
    trainloader_gt_iter = iter(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(),
                             lr=args.learning_rate_D,
                             betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    # labels for adversarial training
    pred_label = 0
    gt_label = 1

    #y_real_, y_fake_ = Variable(torch.ones(args.batch_size, 1).cuda()), Variable(torch.zeros(args.batch_size, 1).cuda())

    for i_iter in range(args.num_steps):

        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_fm_value = 0
        loss_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):

            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # train with source

            try:
                batch = next(trainloader_iter)
            except:
                trainloader_iter = iter(trainloader)
                batch = next(trainloader_iter)

            images, labels, _, _ = batch
            images = Variable(images).cuda(args.gpu)
            #ignore_mask = (labels.numpy() == 255)
            pred = interp(model(images))

            loss_seg = loss_calc(pred, labels, args.gpu)
            loss_seg.backward()
            loss_seg_value += loss_seg.data.cpu().numpy()[0] / args.iter_size

            if i_iter >= args.adv_start:

                #fm loss calc
                try:
                    batch = next(trainloader_all_iter)
                except:
                    trainloader_iter = iter(trainloader_all)
                    batch = next(trainloader_all_iter)

                images, labels, _, _ = batch
                images = Variable(images).cuda(args.gpu)
                #ignore_mask = (labels.numpy() == 255)
                pred = interp(model(images))

                _, D_out_y_pred = model_D(F.softmax(pred))

                trainloader_gt_iter = iter(trainloader_gt)
                batch = next(trainloader_gt_iter)

                _, labels_gt, _, _ = batch
                D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
                #ignore_mask_gt = (labels_gt.numpy() == 255)

                _, D_out_y_gt = model_D(D_gt_v)

                fm_loss = torch.mean(
                    torch.abs(
                        torch.mean(D_out_y_gt, 0) -
                        torch.mean(D_out_y_pred, 0)))

                loss = loss_seg + args.lambda_fm * fm_loss

                # proper normalization
                fm_loss.backward()
                #loss_seg_value += loss_seg.data.cpu().numpy()[0]/args.iter_size
                loss_fm_value += fm_loss.data.cpu().numpy()[0] / args.iter_size
                loss_value += loss.data.cpu().numpy()[0] / args.iter_size

                # train D

                # bring back requires_grad
                for param in model_D.parameters():
                    param.requires_grad = True

                # train with pred
                pred = pred.detach()

                D_out_z, _ = model_D(F.softmax(pred))
                y_fake_ = Variable(torch.zeros(D_out_z.size(0), 1).cuda())
                loss_D_fake = criterion(D_out_z, y_fake_)

                # train with gt
                # get gt labels
                _, labels_gt, _, _ = batch
                D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)
                #ignore_mask_gt = (labels_gt.numpy() == 255)

                D_out_z_gt, _ = model_D(D_gt_v)
                #D_out = interp(D_out_x)

                y_real_ = Variable(torch.ones(D_out_z_gt.size(0), 1).cuda())

                loss_D_real = criterion(D_out_z_gt, y_real_)
                loss_D = loss_D_fake + loss_D_real
                loss_D.backward()
                loss_D_value += loss_D.data.cpu().numpy()[0]

        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_D = {3:.3f}'.
              format(i_iter, args.num_steps, loss_seg_value, loss_D_value))
        print('fm_loss: ', loss_fm_value, ' g_loss: ', loss_value)

        if i_iter >= args.num_steps - 1:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir,
                         'VOC_' + str(args.num_steps) + '_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(args.snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth'))

    end = timeit.default_timer()
    print(end - start, 'seconds')
Exemplo n.º 22
0
def train(log_file, arch, dataset, batch_size, iter_size, num_workers,
          partial_data, partial_data_size, partial_id, ignore_label, crop_size,
          eval_crop_size, is_training, learning_rate, learning_rate_d,
          supervised, lambda_adv_pred, lambda_semi, lambda_semi_adv, mask_t,
          semi_start, semi_start_adv, d_remain, momentum, not_restore_last,
          num_steps, power, random_mirror, random_scale, random_seed,
          restore_from, restore_from_d, eval_every, save_snapshot_every,
          snapshot_dir, weight_decay, device):
    settings = locals().copy()

    import cv2
    import torch
    import torch.nn as nn
    from torch.utils import data, model_zoo
    import numpy as np
    import pickle
    import torch.optim as optim
    import torch.nn.functional as F
    import scipy.misc
    import sys
    import os
    import os.path as osp
    import pickle

    from model.deeplab import Res_Deeplab
    from model.unet import unet_resnet50
    from model.deeplabv3 import resnet101_deeplabv3
    from model.discriminator import FCDiscriminator
    from utils.loss import CrossEntropy2d, BCEWithLogitsLoss2d
    from utils.evaluation import EvaluatorIoU
    from dataset.voc_dataset import VOCDataSet
    import logger

    torch_device = torch.device(device)

    import time

    if log_file != '' and log_file != 'none':
        if os.path.exists(log_file):
            print('Log file {} already exists; exiting...'.format(log_file))
            return

    with logger.LogFile(log_file if log_file != 'none' else None):
        if dataset == 'pascal_aug':
            ds = VOCDataSet(augmented_pascal=True)
        elif dataset == 'pascal':
            ds = VOCDataSet(augmented_pascal=False)
        else:
            print('Dataset {} not yet supported'.format(dataset))
            return

        print('Command: {}'.format(sys.argv[0]))
        print('Arguments: {}'.format(' '.join(sys.argv[1:])))
        print('Settings: {}'.format(', '.join([
            '{}={}'.format(k, settings[k])
            for k in sorted(list(settings.keys()))
        ])))

        print('Loaded data')

        def loss_calc(pred, label):
            """
            This function returns cross entropy loss for semantic segmentation
            """
            # out shape batch_size x channels x h x w -> batch_size x channels x h x w
            # label shape h x w x 1 x batch_size  -> batch_size x 1 x h x w
            label = label.long().to(torch_device)
            criterion = CrossEntropy2d()

            return criterion(pred, label)

        def lr_poly(base_lr, iter, max_iter, power):
            return base_lr * ((1 - float(iter) / max_iter)**(power))

        def adjust_learning_rate(optimizer, i_iter):
            lr = lr_poly(learning_rate, i_iter, num_steps, power)
            optimizer.param_groups[0]['lr'] = lr
            if len(optimizer.param_groups) > 1:
                optimizer.param_groups[1]['lr'] = lr * 10

        def adjust_learning_rate_D(optimizer, i_iter):
            lr = lr_poly(learning_rate_d, i_iter, num_steps, power)
            optimizer.param_groups[0]['lr'] = lr
            if len(optimizer.param_groups) > 1:
                optimizer.param_groups[1]['lr'] = lr * 10

        def one_hot(label):
            label = label.numpy()
            one_hot = np.zeros((label.shape[0], ds.num_classes, label.shape[1],
                                label.shape[2]),
                               dtype=label.dtype)
            for i in range(ds.num_classes):
                one_hot[:, i, ...] = (label == i)
            #handle ignore labels
            return torch.tensor(one_hot,
                                dtype=torch.float,
                                device=torch_device)

        def make_D_label(label, ignore_mask):
            ignore_mask = np.expand_dims(ignore_mask, axis=1)
            D_label = np.ones(ignore_mask.shape) * label
            D_label[ignore_mask] = ignore_label
            D_label = torch.tensor(D_label,
                                   dtype=torch.float,
                                   device=torch_device)

            return D_label

        h, w = map(int, eval_crop_size.split(','))
        eval_crop_size = (h, w)

        h, w = map(int, crop_size.split(','))
        crop_size = (h, w)

        # create network
        if arch == 'deeplab2':
            model = Res_Deeplab(num_classes=ds.num_classes)
        elif arch == 'unet_resnet50':
            model = unet_resnet50(num_classes=ds.num_classes)
        elif arch == 'resnet101_deeplabv3':
            model = resnet101_deeplabv3(num_classes=ds.num_classes)
        else:
            print('Architecture {} not supported'.format(arch))
            return

        # load pretrained parameters
        if restore_from[:4] == 'http':
            saved_state_dict = model_zoo.load_url(restore_from)
        else:
            saved_state_dict = torch.load(restore_from)

        # only copy the params that exist in current model (caffe-like)
        new_params = model.state_dict().copy()
        for name, param in new_params.items():
            if name in saved_state_dict and param.size(
            ) == saved_state_dict[name].size():
                new_params[name].copy_(saved_state_dict[name])
        model.load_state_dict(new_params)

        model.train()
        model = model.to(torch_device)

        # init D
        model_D = FCDiscriminator(num_classes=ds.num_classes)
        if restore_from_d is not None:
            model_D.load_state_dict(torch.load(restore_from_d))
        model_D.train()
        model_D = model_D.to(torch_device)

        print('Built model')

        if snapshot_dir is not None:
            if not os.path.exists(snapshot_dir):
                os.makedirs(snapshot_dir)

        ds_train_xy = ds.train_xy(crop_size=crop_size,
                                  scale=random_scale,
                                  mirror=random_mirror,
                                  range01=model.RANGE01,
                                  mean=model.MEAN,
                                  std=model.STD)
        ds_train_y = ds.train_y(crop_size=crop_size,
                                scale=random_scale,
                                mirror=random_mirror,
                                range01=model.RANGE01,
                                mean=model.MEAN,
                                std=model.STD)
        ds_val_xy = ds.val_xy(crop_size=eval_crop_size,
                              scale=False,
                              mirror=False,
                              range01=model.RANGE01,
                              mean=model.MEAN,
                              std=model.STD)

        train_dataset_size = len(ds_train_xy)

        if partial_data_size != -1:
            if partial_data_size > partial_data_size:
                print('partial-data-size > |train|: exiting')
                return

        if partial_data == 1.0 and (partial_data_size == -1 or
                                    partial_data_size == train_dataset_size):
            trainloader = data.DataLoader(ds_train_xy,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=5,
                                          pin_memory=True)

            trainloader_gt = data.DataLoader(ds_train_y,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=5,
                                             pin_memory=True)

            trainloader_remain = None
            print('|train|={}'.format(train_dataset_size))
            print('|val|={}'.format(len(ds_val_xy)))
        else:
            #sample partial data
            if partial_data_size != -1:
                partial_size = partial_data_size
            else:
                partial_size = int(partial_data * train_dataset_size)

            if partial_id is not None:
                train_ids = pickle.load(open(partial_id))
                print('loading train ids from {}'.format(partial_id))
            else:
                rng = np.random.RandomState(random_seed)
                train_ids = list(rng.permutation(train_dataset_size))

            if snapshot_dir is not None:
                pickle.dump(train_ids,
                            open(osp.join(snapshot_dir, 'train_id.pkl'), 'wb'))

            print('|train supervised|={}'.format(partial_size))
            print('|train unsupervised|={}'.format(train_dataset_size -
                                                   partial_size))
            print('|val|={}'.format(len(ds_val_xy)))

            print('supervised={}'.format(list(train_ids[:partial_size])))

            train_sampler = data.sampler.SubsetRandomSampler(
                train_ids[:partial_size])
            train_remain_sampler = data.sampler.SubsetRandomSampler(
                train_ids[partial_size:])
            train_gt_sampler = data.sampler.SubsetRandomSampler(
                train_ids[:partial_size])

            trainloader = data.DataLoader(ds_train_xy,
                                          batch_size=batch_size,
                                          sampler=train_sampler,
                                          num_workers=3,
                                          pin_memory=True)
            trainloader_remain = data.DataLoader(ds_train_xy,
                                                 batch_size=batch_size,
                                                 sampler=train_remain_sampler,
                                                 num_workers=3,
                                                 pin_memory=True)
            trainloader_gt = data.DataLoader(ds_train_y,
                                             batch_size=batch_size,
                                             sampler=train_gt_sampler,
                                             num_workers=3,
                                             pin_memory=True)

            trainloader_remain_iter = enumerate(trainloader_remain)

        testloader = data.DataLoader(ds_val_xy,
                                     batch_size=1,
                                     shuffle=False,
                                     pin_memory=True)

        print('Data loaders ready')

        trainloader_iter = enumerate(trainloader)
        trainloader_gt_iter = enumerate(trainloader_gt)

        # implement model.optim_parameters(args) to handle different models' lr setting

        # optimizer for segmentation network
        optimizer = optim.SGD(model.optim_parameters(learning_rate),
                              lr=learning_rate,
                              momentum=momentum,
                              weight_decay=weight_decay)
        optimizer.zero_grad()

        # optimizer for discriminator network
        optimizer_D = optim.Adam(model_D.parameters(),
                                 lr=learning_rate_d,
                                 betas=(0.9, 0.99))
        optimizer_D.zero_grad()

        # loss/ bilinear upsampling
        bce_loss = BCEWithLogitsLoss2d()

        print('Built optimizer')

        # labels for adversarial training
        pred_label = 0
        gt_label = 1

        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_mask_accum = 0
        loss_semi_value = 0
        loss_semi_adv_value = 0

        t1 = time.time()

        print('Training for {} steps...'.format(num_steps))
        for i_iter in range(num_steps + 1):

            model.train()
            model.freeze_batchnorm()

            optimizer.zero_grad()
            adjust_learning_rate(optimizer, i_iter)
            optimizer_D.zero_grad()
            adjust_learning_rate_D(optimizer_D, i_iter)

            for sub_i in range(iter_size):

                # train G

                if not supervised:
                    # don't accumulate grads in D
                    for param in model_D.parameters():
                        param.requires_grad = False

                # do semi first
                if not supervised and (lambda_semi > 0 or lambda_semi_adv > 0 ) and i_iter >= semi_start_adv and \
                        trainloader_remain is not None:
                    try:
                        _, batch = next(trainloader_remain_iter)
                    except:
                        trainloader_remain_iter = enumerate(trainloader_remain)
                        _, batch = next(trainloader_remain_iter)

                    # only access to img
                    images, _, _, _ = batch
                    images = images.float().to(torch_device)

                    pred = model(images)
                    pred_remain = pred.detach()

                    D_out = model_D(F.softmax(pred, dim=1))
                    D_out_sigmoid = F.sigmoid(
                        D_out).data.cpu().numpy().squeeze(axis=1)

                    ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(
                        np.bool)

                    loss_semi_adv = lambda_semi_adv * bce_loss(
                        D_out, make_D_label(gt_label, ignore_mask_remain))
                    loss_semi_adv = loss_semi_adv / iter_size

                    #loss_semi_adv.backward()
                    loss_semi_adv_value += float(
                        loss_semi_adv) / lambda_semi_adv

                    if lambda_semi <= 0 or i_iter < semi_start:
                        loss_semi_adv.backward()
                        loss_semi_value = 0
                    else:
                        # produce ignore mask
                        semi_ignore_mask = (D_out_sigmoid < mask_t)

                        semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                        semi_gt[semi_ignore_mask] = ignore_label

                        semi_ratio = 1.0 - float(
                            semi_ignore_mask.sum()) / semi_ignore_mask.size

                        loss_semi_mask_accum += float(semi_ratio)

                        if semi_ratio == 0.0:
                            loss_semi_value += 0
                        else:
                            semi_gt = torch.FloatTensor(semi_gt)

                            loss_semi = lambda_semi * loss_calc(pred, semi_gt)
                            loss_semi = loss_semi / iter_size
                            loss_semi_value += float(loss_semi) / lambda_semi
                            loss_semi += loss_semi_adv
                            loss_semi.backward()

                else:
                    loss_semi = None
                    loss_semi_adv = None

                # train with source

                try:
                    _, batch = next(trainloader_iter)
                except:
                    trainloader_iter = enumerate(trainloader)
                    _, batch = next(trainloader_iter)

                images, labels, _, _ = batch
                images = images.float().to(torch_device)
                ignore_mask = (labels.numpy() == ignore_label)
                pred = model(images)

                loss_seg = loss_calc(pred, labels)

                if supervised:
                    loss = loss_seg
                else:
                    D_out = model_D(F.softmax(pred, dim=1))

                    loss_adv_pred = bce_loss(
                        D_out, make_D_label(gt_label, ignore_mask))

                    loss = loss_seg + lambda_adv_pred * loss_adv_pred
                    loss_adv_pred_value += float(loss_adv_pred) / iter_size

                # proper normalization
                loss = loss / iter_size
                loss.backward()
                loss_seg_value += float(loss_seg) / iter_size

                if not supervised:
                    # train D

                    # bring back requires_grad
                    for param in model_D.parameters():
                        param.requires_grad = True

                    # train with pred
                    pred = pred.detach()

                    if d_remain:
                        pred = torch.cat((pred, pred_remain), 0)
                        ignore_mask = np.concatenate(
                            (ignore_mask, ignore_mask_remain), axis=0)

                    D_out = model_D(F.softmax(pred, dim=1))
                    loss_D = bce_loss(D_out,
                                      make_D_label(pred_label, ignore_mask))
                    loss_D = loss_D / iter_size / 2
                    loss_D.backward()
                    loss_D_value += float(loss_D)

                    # train with gt
                    # get gt labels
                    try:
                        _, batch = next(trainloader_gt_iter)
                    except:
                        trainloader_gt_iter = enumerate(trainloader_gt)
                        _, batch = next(trainloader_gt_iter)

                    _, labels_gt, _, _ = batch
                    D_gt_v = one_hot(labels_gt)
                    ignore_mask_gt = (labels_gt.numpy() == ignore_label)

                    D_out = model_D(D_gt_v)
                    loss_D = bce_loss(D_out,
                                      make_D_label(gt_label, ignore_mask_gt))
                    loss_D = loss_D / iter_size / 2
                    loss_D.backward()
                    loss_D_value += float(loss_D)

            optimizer.step()
            optimizer_D.step()

            sys.stdout.write('.')
            sys.stdout.flush()

            if i_iter % eval_every == 0 and i_iter != 0:
                model.eval()
                with torch.no_grad():
                    evaluator = EvaluatorIoU(ds.num_classes)
                    for index, batch in enumerate(testloader):
                        image, label, size, name = batch
                        size = size[0].numpy()
                        image = image.float().to(torch_device)
                        output = model(image)
                        output = output.cpu().data[0].numpy()

                        output = output[:, :size[0], :size[1]]
                        gt = np.asarray(label[0].numpy()[:size[0], :size[1]],
                                        dtype=np.int)

                        output = output.transpose(1, 2, 0)
                        output = np.asarray(np.argmax(output, axis=2),
                                            dtype=np.int)

                        evaluator.sample(gt, output, ignore_value=ignore_label)

                        sys.stdout.write('+')
                        sys.stdout.flush()

                per_class_iou = evaluator.score()
                mean_iou = per_class_iou.mean()

                loss_seg_value /= eval_every
                loss_adv_pred_value /= eval_every
                loss_D_value /= eval_every
                loss_semi_mask_accum /= eval_every
                loss_semi_value /= eval_every
                loss_semi_adv_value /= eval_every

                sys.stdout.write('\n')

                t2 = time.time()

                print(
                    'iter = {:8d}/{:8d}, took {:.3f}s, loss_seg = {:.6f}, loss_adv_p = {:.6f}, loss_D = {:.6f}, loss_semi_mask_rate = {:.3%} loss_semi = {:.6f}, loss_semi_adv = {:.3f}'
                    .format(i_iter, num_steps, t2 - t1, loss_seg_value,
                            loss_adv_pred_value, loss_D_value,
                            loss_semi_mask_accum, loss_semi_value,
                            loss_semi_adv_value))

                for i, (class_name,
                        iou) in enumerate(zip(ds.class_names, per_class_iou)):
                    print('class {:2d} {:12} IU {:.2f}'.format(
                        i, class_name, iou))

                print('meanIOU: ' + str(mean_iou) + '\n')

                loss_seg_value = 0
                loss_adv_pred_value = 0
                loss_D_value = 0
                loss_semi_value = 0
                loss_semi_mask_accum = 0
                loss_semi_adv_value = 0

                t1 = t2

            if snapshot_dir is not None and i_iter % save_snapshot_every == 0 and i_iter != 0:
                print('taking snapshot ...')
                torch.save(
                    model.state_dict(),
                    osp.join(snapshot_dir, 'VOC_' + str(i_iter) + '.pth'))
                torch.save(
                    model_D.state_dict(),
                    osp.join(snapshot_dir, 'VOC_' + str(i_iter) + '_D.pth'))

        if snapshot_dir is not None:
            print('save model ...')
            torch.save(
                model.state_dict(),
                osp.join(snapshot_dir, 'VOC_' + str(num_steps) + '.pth'))
            torch.save(
                model_D.state_dict(),
                osp.join(snapshot_dir, 'VOC_' + str(num_steps) + '_D.pth'))
Exemplo n.º 23
0
def main():

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    cudnn.enabled = True


    # create network
    model = Res_Deeplab(num_classes=args.num_classes)

    # load pretrained parameters
    if args.restore_from[:4] == 'http' :
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in currendt model (caffe-like)
    new_params = model.state_dict().copy()
    for name, param in new_params.items():
        print (name)
        if name in saved_state_dict and param.size() == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            print('copy {}'.format(name))

    model.load_state_dict(new_params)


    model.train()


    model=nn.DataParallel(model)
    model.cuda()


    cudnn.benchmark = True

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))

    model_D = nn.DataParallel(model_D)
    model_D.train()
    model_D.cuda()


    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)


    train_dataset = VOCDataSet(args.data_dir, args.data_list, crop_size=input_size,
                    scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)

    train_gt_dataset = VOCGTDataSet(args.data_dir, args.data_list, crop_size=input_size,
                       scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    if args.partial_data is None:
        trainloader = data.DataLoader(train_dataset,
                        batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset,
                        batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True)
    else:
        #sample partial data
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print('loading train ids from {}'.format(args.partial_id))
        else:
            train_ids = list(range(train_dataset_size))
            np.random.shuffle(train_ids)

        pickle.dump(train_ids, open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))

        train_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,
                        batch_size=args.batch_size, sampler=train_sampler, num_workers=3, pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                        batch_size=args.batch_size, sampler=train_remain_sampler, num_workers=3, pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                        batch_size=args.batch_size, sampler=train_gt_sampler, num_workers=3, pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)


    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)


    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.module.optim_parameters(args),
                lr=args.learning_rate, momentum=args.momentum,weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(), lr=args.learning_rate_D, betas=(0.9,0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    if version.parse(torch.__version__) >= version.parse('0.4.0'):
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear', align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')


    # labels for adversarial training
    pred_label = 0
    gt_label = 1


    for i_iter in range(args.num_steps):

        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0
        loss_semi_adv_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):


            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # do semi first
            if (args.lambda_semi > 0 or args.lambda_semi_adv > 0 ) and i_iter >= args.semi_start_adv :
                try:
                    _, batch = trainloader_remain_iter.next()
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = trainloader_remain_iter.next()

                # only access to img
                images, _, _, _ = batch
                images = Variable(images).cuda()


                pred = interp(model(images))
                pred_remain = pred.detach()

                mask1=F.softmax(pred,dim=1).data.cpu().numpy()

                id2 = np.argmax(mask1, axis=1)#10, 321, 321)


                D_out = interp(model_D(F.softmax(pred,dim=1)))
                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(axis=1)


                ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(np.bool)

                loss_semi_adv = args.lambda_semi_adv * bce_loss(D_out, make_D_label(gt_label, ignore_mask_remain))
                loss_semi_adv = loss_semi_adv/args.iter_size

                #loss_semi_adv.backward()
                loss_semi_adv_value += loss_semi_adv.data.cpu().numpy()[0]/args.lambda_semi_adv

                if args.lambda_semi <= 0 or i_iter < args.semi_start:
                    loss_semi_adv.backward()
                    loss_semi_value = 0
                else:
                    # produce ignore mask
                    semi_ignore_mask = (D_out_sigmoid < args.mask_T)
                    #print semi_ignore_mask.shape 10,321,321

                    map2 = np.zeros([pred.size()[0], id2.shape[1], id2.shape[2]])
                    for k in  range(pred.size()[0]):
                        for i in range(id2.shape[1]):
                            for j in range(id2.shape[2]):
                               map2[k][i][j] = mask1[k][id2[k][i][j]][i][j]


                    semi_ignore_mask = (map2 <  0.999999)
                    semi_gt = pred.data.cpu().numpy().argmax(axis=1)
                    semi_gt[semi_ignore_mask] = 255

                    semi_ratio = 1.0 - float(semi_ignore_mask.sum())/semi_ignore_mask.size
                    print('semi ratio: {:.4f}'.format(semi_ratio))

                    if semi_ratio == 0.0:
                        loss_semi_value += 0
                    else:
                        semi_gt = torch.FloatTensor(semi_gt)

                        loss_semi = args.lambda_semi * loss_calc(pred, semi_gt)
                        loss_semi = loss_semi/args.iter_size
                        loss_semi_value += loss_semi.data.cpu().numpy()[0]/args.lambda_semi
                        loss_semi += loss_semi_adv
                        loss_semi.backward()

            else:
                loss_semi = None
                loss_semi_adv = None

            # train with source

            try:
                _, batch = trainloader_iter.next()
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = trainloader_iter.next()

            images, labels, _, _ = batch
            images = Variable(images).cuda()
            ignore_mask = (labels.numpy() == 255)
            pred = interp(model(images))

            loss_seg = loss_calc(pred, labels)

            D_out = interp(model_D(F.softmax(pred,dim=1)))

            loss_adv_pred = bce_loss(D_out, make_D_label(gt_label, ignore_mask))

            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred

            # proper normalization
            loss = loss/args.iter_size
            loss.backward()
            loss_seg_value += loss_seg.data.cpu().numpy()[0]/args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0]/args.iter_size


            # train D

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            # train with pred
            pred = pred.detach()

            if args.D_remain:
                pred = torch.cat((pred, pred_remain), 0)
                ignore_mask = np.concatenate((ignore_mask,ignore_mask_remain), axis = 0)


            D_out = interp(model_D(F.softmax(pred,dim=1)))
            loss_D = bce_loss(D_out, make_D_label(pred_label, ignore_mask))
            loss_D = loss_D/args.iter_size/2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]


            # train with gt
            # get gt labels
            try:
                _, batch = trainloader_gt_iter.next()
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = trainloader_gt_iter.next()

            _, labels_gt, _, _ = batch
            D_gt_v = Variable(one_hot(labels_gt)).cuda()
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))
            loss_D = bce_loss(D_out, make_D_label(gt_label, ignore_mask_gt))
            loss_D = loss_D/args.iter_size/2
            loss_D.backward()
            loss_D_value += loss_D.data.cpu().numpy()[0]



        optimizer.step()
        optimizer_D.step()

        print('exp = {}'.format(args.snapshot_dir))
        print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, loss_semi = {5:.3f}, loss_semi_adv = {6:.3f}'.format(i_iter, args.num_steps, loss_seg_value, loss_adv_pred_value, loss_D_value, loss_semi_value, loss_semi_adv_value))

        if i_iter >= args.num_steps-1:
            print( 'save model ...')
            torch.save(model.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+os.path.abspath(__file__).split('/')[-1].split('.')[0]+'_'+str(args.num_steps)+'.pth'))
            torch.save(model_D.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+os.path.abspath(__file__).split('/')[-1].split('.')[0]+'_'+str(args.num_steps)+'_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter!=0:
            print ('taking snapshot ...')
            torch.save(model.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+os.path.abspath(__file__).split('/')[-1].split('.')[0]+'_'+str(i_iter)+'.pth'))
            torch.save(model_D.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+os.path.abspath(__file__).split('/')[-1].split('.')[0]+'_'+str(i_iter)+'_D.pth'))

    end = timeit.default_timer()
    print(end-start,'seconds')
Exemplo n.º 24
0
def main():
    h, w = list(map(int, args.input_size.split(',')))   # 321, 321
    input_size = (h, w)

    cudnn.enabled = True

    # create network
    model = Res_Deeplab(num_classes=args.num_classes)   # num_classes = 21

    # load pretrained parameters
    if args.restore_from[:4] == 'http':
        saved_state_dict = model_zoo.load_url(args.restore_from)
    else:
        saved_state_dict = torch.load(args.restore_from)

    # only copy the params that exist in current model (caffe-like)
    new_params = model.state_dict().copy()
    for name, param in list(new_params.items()):
        # print(name)
        if name in saved_state_dict and param.size() == saved_state_dict[name].size():
            new_params[name].copy_(saved_state_dict[name])
            # print(('copy {}'.format(name)))
    model.load_state_dict(new_params)

    model.train()
    model.cuda(args.gpu)

    cudnn.benchmark = True

    # init D
    model_D = FCDiscriminator(num_classes=args.num_classes)   # num_classes = 21,全卷积判别模型
    if args.restore_from_D is not None:
        model_D.load_state_dict(torch.load(args.restore_from_D))

    model_D.train()
    model_D.cuda(args.gpu)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)
    if not os.path.exists('logs/'):
        os.makedirs('logs/')
    now_time = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
    log_file = 'logs/' + now_time + '.txt'
    file = open(log_file, 'w')      # 保存loss

    train_dataset = VOCDataSet(args.data_dir, args.data_list, args.label_list, crop_size=input_size,
                               scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    train_dataset_size = len(train_dataset)

    train_gt_dataset = VOCGTDataSet(args.data_dir, args.data_list, args.label_list, crop_size=input_size,
                                    scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN)

    if args.partial_data is None:   # 使用全部数据
        trainloader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,  # batch_size = 10
                                      num_workers=5, pin_memory=True)

        trainloader_gt = data.DataLoader(train_gt_dataset, batch_size=args.batch_size, shuffle=True,
                                         num_workers=5, pin_memory=True)
    else:
        # sample partial data 部分数据
        partial_size = int(args.partial_data * train_dataset_size)

        if args.partial_id is not None:
            train_ids = pickle.load(open(args.partial_id))
            print(('loading train ids from {}'.format(args.partial_id)))
        else:
            train_ids = list(range(train_dataset_size))
            np.random.shuffle(train_ids)

        pickle.dump(train_ids, open(osp.join(args.snapshot_dir, 'train_id.pkl'), 'wb'))    # 将train_ids写入train_id.pkl

        train_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])
        train_remain_sampler = data.sampler.SubsetRandomSampler(train_ids[partial_size:])
        train_gt_sampler = data.sampler.SubsetRandomSampler(train_ids[:partial_size])

        trainloader = data.DataLoader(train_dataset,    # 数据集中采样输入
                        batch_size=args.batch_size, sampler=train_sampler, num_workers=3, pin_memory=True)
        trainloader_remain = data.DataLoader(train_dataset,
                        batch_size=args.batch_size, sampler=train_remain_sampler, num_workers=3, pin_memory=True)
        trainloader_gt = data.DataLoader(train_gt_dataset,
                        batch_size=args.batch_size, sampler=train_gt_sampler, num_workers=3, pin_memory=True)

        trainloader_remain_iter = enumerate(trainloader_remain)

    trainloader_iter = enumerate(trainloader)
    trainloader_gt_iter = enumerate(trainloader_gt)

    # implement model.optim_parameters(args) to handle different models' lr setting

    # optimizer for segmentation network
    optimizer = optim.SGD(model.optim_parameters(args),
                          lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
    optimizer.zero_grad()

    # optimizer for discriminator network
    optimizer_D = optim.Adam(model_D.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99))
    optimizer_D.zero_grad()

    # loss/ bilinear upsampling
    bce_loss = BCEWithLogitsLoss2d()
    # interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    if version.parse(torch.__version__) >= version.parse('0.4.0'):
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear', align_corners=True)
    else:
        interp = nn.Upsample(size=(input_size[1], input_size[0]), mode='bilinear')

    # labels for adversarial training
    pred_label = 0
    gt_label = 1
    best_loss = 1
    best_epoch = 0

    for i_iter in range(args.num_steps):    # num_steps = 20000
        loss_seg_value = 0
        loss_adv_pred_value = 0
        loss_D_value = 0
        loss_semi_value = 0
        loss_semi_adv_value = 0

        optimizer.zero_grad()
        adjust_learning_rate(optimizer, i_iter)
        optimizer_D.zero_grad()
        adjust_learning_rate_D(optimizer_D, i_iter)

        for sub_i in range(args.iter_size):     # iter_size = 1
            # train G

            # don't accumulate grads in D
            for param in model_D.parameters():
                param.requires_grad = False

            # do semi first
            if (args.lambda_semi > 0 or args.lambda_semi_adv > 0) and i_iter >= args.semi_start_adv:
                try:
                    _, batch = next(trainloader_remain_iter)
                except:
                    trainloader_remain_iter = enumerate(trainloader_remain)
                    _, batch = next(trainloader_remain_iter)

                # only access to img 无标签数据
                images, _, _, _, _ = batch
                images = Variable(images).cuda(args.gpu)

                pred = interp(model(images))
                pred_remain = pred.detach()     # 返回一个新的Variable,不具有grade

                D_out = interp(model_D(F.softmax(pred)))
                D_out_sigmoid = F.sigmoid(D_out).data.cpu().numpy().squeeze(axis=1)

                ignore_mask_remain = np.zeros(D_out_sigmoid.shape).astype(np.bool)

                loss_semi_adv = args.lambda_semi_adv * bce_loss(D_out, make_D_label(gt_label, ignore_mask_remain))
                loss_semi_adv = loss_semi_adv/args.iter_size

                # loss_semi_adv.backward()

                # print('bug,', loss_semi_adv.data.cpu().numpy())

                # loss_semi_adv_value += loss_semi_adv.data.cpu().numpy()[0]/args.lambda_semi_adv
                loss_semi_adv_value += loss_semi_adv.data.cpu().numpy()/args.lambda_semi_adv

                if args.lambda_semi <= 0 or i_iter < args.semi_start:
                    loss_semi_adv.backward()
                    loss_semi_value = 0
                else:
                    # produce ignore mask
                    semi_ignore_mask = (D_out_sigmoid < args.mask_T)    # mask_T = 0.2,阈值

                    semi_gt = pred.data.cpu().numpy().argmax(axis=1)    # 返回维度为1上的最大值的下标
                    semi_gt[semi_ignore_mask] = 255

                    semi_ratio = 1.0 - float(semi_ignore_mask.sum())/semi_ignore_mask.size  # 被忽略的点占的比重
                    print(('semi ratio: {:.4f}'.format(semi_ratio)))

                    if semi_ratio == 0.0:
                        loss_semi_value += 0
                    else:
                        semi_gt = torch.FloatTensor(semi_gt)

                        loss_semi = args.lambda_semi * loss_calc(pred, semi_gt, args.gpu)
                        loss_semi = loss_semi/args.iter_size
                        # loss_semi_value += loss_semi.data.cpu().numpy()[0]/args.lambda_semi
                        loss_semi_value += loss_semi.data.cpu().numpy()/args.lambda_semi
                        loss_semi += loss_semi_adv
                        loss_semi.backward()

            else:
                loss_semi = None
                loss_semi_adv = None

            # train with source

            try:
                _, batch = next(trainloader_iter)
            except:
                trainloader_iter = enumerate(trainloader)
                _, batch = next(trainloader_iter)

            images, labels, _, _, _ = batch    # 有标签数据
            images = Variable(images).cuda(args.gpu)
            ignore_mask = (labels.numpy() == 255)
            pred = interp(model(images))    # interp上采样

            loss_seg = loss_calc(pred, labels, args.gpu)    # 语义分割的cross entropy loss
            # loss_seg_NLL = loss_NLL(pred, labels, args.gpu)     # 语义分割的NLLLoss

            D_out = interp(model_D(F.softmax(pred)))    # 得到判别模型输出的判别图

            loss_adv_pred = bce_loss(D_out, make_D_label(gt_label, ignore_mask))

            loss = loss_seg + args.lambda_adv_pred * loss_adv_pred

            # proper normalization
            loss = loss/args.iter_size
            loss.backward()

            # loss_seg_value += loss_seg.data.cpu().numpy()[0]/args.iter_size
            # loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()[0]/args.iter_size

            loss_seg_value += loss_seg.data.cpu().numpy()/args.iter_size
            # loss_seg_value += loss_seg_NLL.data.cpu().numpy()/args.iter_size
            loss_adv_pred_value += loss_adv_pred.data.cpu().numpy()/args.iter_size

            # train D

            # bring back requires_grad
            for param in model_D.parameters():
                param.requires_grad = True

            # train with pred
            pred = pred.detach()

            if args.D_remain:
                pred = torch.cat((pred, pred_remain), 0)
                ignore_mask = np.concatenate((ignore_mask,ignore_mask_remain), axis=0)

            D_out = interp(model_D(F.softmax(pred)))
            loss_D = bce_loss(D_out, make_D_label(pred_label, ignore_mask))
            loss_D = loss_D/args.iter_size/2
            loss_D.backward()
            # loss_D_value += loss_D.data.cpu().numpy()[0]
            loss_D_value += loss_D.data.cpu().numpy()

            # train with gt
            # get gt labels
            try:
                _, batch = next(trainloader_gt_iter)
            except:
                trainloader_gt_iter = enumerate(trainloader_gt)
                _, batch = next(trainloader_gt_iter)

            _, labels_gt, _, _, _ = batch
            D_gt_v = Variable(one_hot(labels_gt)).cuda(args.gpu)    # 每个类别一张label图,batch * class * h * w
            ignore_mask_gt = (labels_gt.numpy() == 255)

            D_out = interp(model_D(D_gt_v))     # ground_truth输入判别模型
            loss_D = bce_loss(D_out, make_D_label(gt_label, ignore_mask_gt))
            loss_D = loss_D/args.iter_size/2
            loss_D.backward()
            # loss_D_value += loss_D.data.cpu().numpy()[0]
            loss_D_value += loss_D.data.cpu().numpy()

        optimizer.step()
        optimizer_D.step()

        print(('exp = {}'.format(args.snapshot_dir)))
        print(('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}, loss_adv_p = {3:.3f}, loss_D = {4:.3f}, '
               'loss_semi = {5:.3f}, loss_semi_adv = {6:.3f}'.format(i_iter, args.num_steps, loss_seg_value,
                                                                     loss_adv_pred_value, loss_D_value, loss_semi_value,
                                                                     loss_semi_adv_value)))

        file.write('{0} {1} {2} {3} {4}\n'.format(loss_seg_value, loss_adv_pred_value, loss_D_value, loss_semi_value,
                                                loss_semi_adv_value))

        if loss_seg_value < best_loss:      # 保存最优模型,删除次优模型
            # print('loss:', loss_seg_value, 'best:', best_loss)
            torch.save(model.state_dict(), osp.join(args.snapshot_dir,
                                                    'VOC_epoch_{0}_seg_loss_{1}.pth'.format(i_iter+1, loss_seg_value)))
            torch.save(model_D.state_dict(), osp.join(args.snapshot_dir,
                                                      'VOC_epoch_{0}_seg_loss_{1}_D.pth'.format(i_iter+1, loss_seg_value)))
            delete_models(best_epoch + 1, best_loss)
            best_loss = loss_seg_value
            best_epoch = i_iter

        if i_iter >= args.num_steps-1:  # num_step = 20000
            print('save model ...')
            torch.save(model.state_dict(), osp.join(args.snapshot_dir, 'VOC_'+str(args.num_steps)+'.pth'))
            torch.save(model_D.state_dict(), osp.join(args.snapshot_dir, 'VOC_'+str(args.num_steps)+'_D.pth'))
            break

        if i_iter % args.save_pred_every == 0 and i_iter != 0:  # save_pred_every = 5000
            print('taking snapshot ...')
            torch.save(model.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(i_iter)+'.pth'))
            torch.save(model_D.state_dict(),osp.join(args.snapshot_dir, 'VOC_'+str(i_iter)+'_D.pth'))

    end = timeit.default_timer()
    print(end-start, 'seconds')
    file.close()