Exemplo n.º 1
0
def main():

    env_name = 'CartPole-v0'
    env = gym.make(env_name)
    action_space = env.action_space.n
    observation_space = env.observation_space.low.shape
    # set logger
    logging.config.fileConfig('./log/log.conf')
    logger = logging.getLogger(__name__)
    logger.info('START')

    # set network model
    shared_model = A3CFFSoftmaxFFF(observation_space, action_space)
    # set optimizer
    opt = RMSpropAsync(lr=LEARNING_RATE , alpha=0.99 , eps=RMSPROP_EPS)
    opt.setup(shared_model)
    opt.add_hook(chainer.optimizer.GradientClipping(40))

    writer = SummaryWriter('results/' + datetime.datetime.now().strftime('%B%d  %H:%M:%S'))
    state = env.reset()
    state = chainer.Variable(np.expand_dims(np.array(state).astype(np.float32), axis=0))
    pi, v = shared_model.get_pi_and_v(state)
    writer.add_graph([pi, v])
    writer.close()

    async_train(env_name, shared_model, opt, phi)

    logger.info('END')
Exemplo n.º 2
0
def main():
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    CarSet = CarDataSet(ROOT, TRAIN, MASK)
    # split train val 
    # train_idx, valid_idx = augmented_train_valid_split(CarSet, test_size = 0.15,shuffle = True ,random_seed=args.seed)
    # train_sampler = SubsetRandomSampler(train_idx)
    # val_samper = SubsetRandomSampler(valid_idx)
    
    train_loader = DataLoader(CarSet,
                            #   sampler=train_sampler,
                              shuffle=True,
                              batch_size=args.batch_size,
                              **kwargs)
    # val_loader = DataLoader(CarSet,
    #                         sampler=val_samper,
    #                         batch_size=2,
    #                         **kwargs)
    model = uNet(NUM_CLASS)
    if args.cuda:
        model.cuda()
    optimizer=optim.Adam(model.parameters(),lr=args.lr,betas=(0.9, 0.999))
    writer=SummaryWriter('logs/'+datetime.now().strftime('%B-%d'))
    best_loss=1e+5
    iters=0
    # resume training 
    if args.resume:
        model,optimizer,args.start_epoch,best_loss,iters = resume(args.resume,model)

    for epoch in range(args.start_epoch ,args.epochs):
        adjust_lr(optimizer,epoch,decay=5)
        t1=time.time()
        loss, iters = train(epoch,
                            model,
                            optimizer,
                            train_loader,
                            writer,
                            iters)
        is_best = loss < best_loss
        best_loss = min(best_loss, loss)
        state={
            'epoch':epoch,
            'state_dict':model.state_dict(),
            'optimizer':optimizer,
            'loss':best_loss,
            'iters': iters,
        }
        save_checkpoint(state, is_best)
    writer.close()
            weights_file_path = os.path.join(snapshot_dir, weights_file_name)
            torch.save(model.state_dict(), weights_file_path)

            # TODO: Maybe delete the older snapshots?

        #
        # ---------------------------------------------------------------------
        # REDUCE THE LEARNING RATE IF APPROPRIATE
        # ---------------------------------------------------------------------

        scheduler.step(val_loss/n_minibatches_validation)

    # -------------------------------------------------------------------------

    print('Finished Training!')
    writer.close()

    # Save the trained model
    print('Saving model...', end=' ')
    weights_file = ('./weights/spectrograms_weights_{}_{}.net'.
                    format(distances, sample_size))
    torch.save(model.state_dict(), weights_file)
    print('Done!')

    #
    # -------------------------------------------------------------------------
    # MAKE PREDICTIONS ON THE TEST SET
    # -------------------------------------------------------------------------

    print('Start making predictions on the test sample...', end=' ')
Exemplo n.º 4
0
def train():
    data_augmentation = DataAugmentationTransform_old(translation_range=(0.0, 0.15),
                                                      rotation_range=10,
                                                      zoom_range = (0.8, 1.0),
                                                      flip_p = 0.5,
                                                      brightness_range = (-0.2, 0.2),
                                                      gamma_range = (0.5, 1.5),
                                                      saturation_range=(-0.3, 0.3))
    loader_train = CityscapesLoader(base_data_folder, split='train', is_transform=True, img_size=image_shape, transforms=None)
    trainloader = data.DataLoader(loader_train, batch_size=batch_size, num_workers=4, shuffle=True, pin_memory=True)
    if overlay_during_training:
        loader_test = CityscapesLoader(base_data_folder, split='test', is_transform=True, img_size=image_shape)
        test_loader = data.DataLoader(loader_test, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    if check_validation:
        loader_val = CityscapesLoader(base_data_folder, split='val', is_transform=True, img_size=image_shape)
        valloader = data.DataLoader(loader_val, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    model = get_model('fcn1s',num_classes)

    writer = SummaryWriter()

    if resume:
        print("Resuming From ",resume_filename)
        checkpoint = torch.load(resume_filename)
        model.load_state_dict(checkpoint['state_dict'])
        #starting_epoch = checkpoint['epoch']
        #optimizer.load_state_dict(checkpoint['optimizer'])

    for param in model.parameters():
        param.requires_grad = True

    if freeze_layers:
        print("Freezing VGG layers")
        for param in model.conv_block1.parameters():
            param.requires_grad = False
        for param in model.conv_block2.parameters():
            param.requires_grad = False
        for param in model.conv_block3.parameters():
            param.requires_grad = False
        for param in model.conv_block4.parameters():
            param.requires_grad = False
        for param in model.conv_block5.parameters():
            param.requires_grad = False

    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda(0)
    else:
        print("Using CPU")

    model.train()

    parameters = filter(lambda p: p.requires_grad, model.parameters())
    if opt == "SGD":
        optimizer = torch.optim.SGD(parameters, lr=l_rate, momentum=0.9, weight_decay=5e-4)
    elif opt =="Adam":
        optimizer = torch.optim.Adam(parameters, lr=l_rate, weight_decay=5e-4)

    best_metric = 0
    old_file = ""
    for epoch in range(starting_epoch, epochs):
        train_acc = 0
        train_IoU = 0
        train_loss = 0
        train_count = 0

        print("\nEpoch: ",epoch)

        if overlay_during_training and epoch % 5  == 0:
            test_img = loader_test[67]
            test_img = test_img.unsqueeze(0)
            model.eval()
            test_pred = model(Variable(test_img.cuda(0), requires_grad=True))
            test_img = Variable(test_img.cuda(0), requires_grad=True)
            overlay_images(test_img, test_pred, epoch, '67_')
            writer.add_graph(model, test_pred)
            del test_pred
            del test_img

            test_img = loader_test[88]
            test_img = test_img.unsqueeze(0)
            test_pred = model(Variable(test_img.cuda(0), requires_grad=True))
            test_img = Variable(test_img.cuda(0), requires_grad=True)
            overlay_images(test_img, test_pred, epoch, '88_')
            del test_pred
            del test_img

            test_img = loader_test[175]
            test_img = test_img.unsqueeze(0)
            test_pred = model(Variable(test_img.cuda(0), requires_grad=True))
            test_img = Variable(test_img.cuda(0), requires_grad=True)
            overlay_images(test_img, test_pred, epoch, '175_')
            del test_pred
            del test_img

        model.train()
        with tqdm.tqdm(trainloader, ncols=100) as t:
            for i, (images, labels) in enumerate(t):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)

                iter = len(trainloader) * epoch + i
                if poly_lr:
                    poly_lr_scheduler(optimizer, l_rate, iter, lr_decay_iter=10)

                optimizer.zero_grad()
                outputs = model(images)

                loss = cross_entropy2d(outputs, labels, ignore_index=255)

                loss.backward()
                optimizer.step()

                #print("%8.2f %%  ->  Loss: %8.6f " % (i / len(trainloader) * 100, loss.data[0]), end='\r')
                t.set_description('Loss: %8.6f' % loss.data[0])
                t.update(1)

                train_loss = train_loss + loss.data[0]
                acc, IoU = accuracy_IoU(outputs,labels, np.array(range(num_classes)))
                train_acc = train_acc + acc
                train_IoU = train_IoU + IoU.mean()
                train_count = train_count + 1

                del outputs
                del loss
                del images
                del labels


        train_acc = train_acc / train_count
        train_IoU = train_IoU / train_count
        train_loss = train_loss / train_count
        print("\nTrain Accuracy: ", train_acc)
        print("Train Loss: ", train_loss)
        print("Train IoU: ", train_IoU, "\n")
        writer.add_scalar('Train Accuracy', train_acc, epoch)
        writer.add_scalar('Train IoU', train_IoU, epoch)
        writer.add_scalar('Train Los', train_loss, epoch)

        if check_validation:
            #VALIDATION!!!
            val_acc = 0
            val_IoU = 0
            val_loss = 0
            val_count = 0
            model.eval()
            for i, (images, labels) in enumerate(valloader):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)
                iter = len(trainloader) * epoch + i
                #poly_lr_scheduler(optimizer, l_rate, iter)

                outputs = model(images)

                loss = cross_entropy2d(outputs, labels, ignore_index=255)

                val_loss = val_loss + loss.data[0]
                acc, IoU = accuracy_IoU(outputs,labels, np.array(range(num_classes)))
                val_acc = val_acc + acc
                val_IoU = val_IoU + IoU.mean()
                val_count = val_count + 1

                del outputs
                del loss
                del images
                del labels
            val_acc = val_acc / val_count
            val_IoU = val_IoU / val_count
            val_loss = val_loss / val_count
            print("\nVal Accuracy: ", val_acc)
            print("Val Loss: ", val_loss)
            print("Val IoU: ", val_IoU, "\n")
            writer.add_scalar('Val Accuracy', val_acc, epoch)
            writer.add_scalar('Val IoU', val_IoU, epoch)
            writer.add_scalar('Val Loss', val_loss, epoch)

        save_metric = val_IoU
        if check_validation:
            save_metric = val_IoU

        if best_metric < save_metric:
            best_metric = save_metric
            print("New Best IoU!")
            if save:
                torch.save({
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                 base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar")
                print("Model Saves As " + base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar")
                if os.path.isfile(old_file):
                    os.remove(old_file)
                old_file = base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar"

        print("Best IoU So Far: ", best_metric)

    writer.close()
    print("End Of Training")
Exemplo n.º 5
0
def train():

    loader_train = CityscapesLoader('/home/cattaneod/CITYSCAPES_crop/',
                                    split='train',
                                    is_transform=True,
                                    img_size=None,
                                    transforms=data_augmentation)
    trainloader = data.DataLoader(loader_train,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  shuffle=True,
                                  pin_memory=True)
    loader_test = CityscapesLoader(base_data_folder,
                                   split='test',
                                   is_transform=True,
                                   img_size=None,
                                   transforms=data_augmentation)
    test_loader = data.DataLoader(loader_test,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  shuffle=False,
                                  pin_memory=True)
    loader_val = CityscapesLoader(base_data_folder,
                                  split='val',
                                  is_transform=True,
                                  img_size=image_shape,
                                  return_original=True)
    valloader = data.DataLoader(loader_val,
                                batch_size=batch_size,
                                num_workers=num_workers,
                                shuffle=False,
                                pin_memory=True)

    model = deeplab_resnet_DUC.Res_Deeplab_DUC(num_classes)

    if TBWriter:
        writer = SummaryWriter()
    '''
    if resume:
        print("Loading from: ", resume_filename)
        saved_state_dict = torch.load(resume_filename)
        if num_classes != 21:
            for i in saved_state_dict:
                # Scale.layer5.conv2d_list.3.weight
                i_parts = i.split('.')
                if i_parts[1] == 'layer5':
                    saved_state_dict[i] = model.state_dict()[i]

        model.load_state_dict(saved_state_dict)
    '''

    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda(0)
    else:
        print("Using CPU")

    model.train()

    if opt == "SGD":
        optimizer = torch.optim.SGD([{
            'params': get_1x_lr_params_NOscale(model),
            'lr': l_rate
        }, {
            'params': get_10x_lr_params(model),
            'lr': 10 * l_rate
        }],
                                    lr=l_rate,
                                    momentum=0.9,
                                    weight_decay=5e-4)
    elif opt == "Adam":
        optimizer = torch.optim.Adam([{
            'params': get_1x_lr_params_NOscale(model),
            'lr': 0 * l_rate
        }, {
            'params': get_10x_lr_params(model),
            'lr': 10 * l_rate
        }],
                                     lr=l_rate,
                                     weight_decay=5e-4)

    if resume:
        print("Resuming From ", resume_filename)
        checkpoint = torch.load(resume_filename)
        saved_state_dict = checkpoint['state_dict']
        if reset_layer5:
            for i in model.state_dict():
                # Scale.layer5.conv2d_list.3.weight
                i_parts = i.split('.')
                if i not in saved_state_dict or i_parts[1] == 'layer5':
                    saved_state_dict[i] = model.state_dict()[i]
        model.load_state_dict(saved_state_dict)
        starting_epoch = checkpoint['epoch'] + 1
        if poly_lr:
            lr_ = poly_lr2(l_rate,
                           len(trainloader) * starting_epoch,
                           lr_decay_iter=1,
                           max_iter=len(trainloader) * epochs)
            if lr_:
                if opt == "SGD":
                    optimizer = torch.optim.SGD(
                        [{
                            'params': get_1x_lr_params_NOscale(model),
                            'lr': lr_
                        }, {
                            'params': get_10x_lr_params(model),
                            'lr': 10 * lr_
                        }],
                        lr=lr_,
                        momentum=0.9,
                        weight_decay=5e-4)
                elif opt == "Adam":
                    optimizer = torch.optim.Adam(
                        [{
                            'params': get_1x_lr_params_NOscale(model),
                            'lr': 0 * lr_
                        }, {
                            'params': get_10x_lr_params(model),
                            'lr': 10 * lr_
                        }],
                        lr=lr_,
                        weight_decay=5e-4)

    best_metric = 0
    old_file = ""
    train_acc = AverageMeter()
    train_IoU = AverageMeter()
    train_loss = AverageMeter()
    for epoch in range(starting_epoch, epochs):
        train_acc.reset()
        train_IoU.reset()
        train_loss.reset()
        train_cfmatrix = np.zeros((num_classes, num_classes))

        print("\nEpoch: ", epoch)

        if overlay_during_training and epoch % 1 == 0:
            for i in range(15):
                print("Overlaying image ", i)
                names, original_img, test_img, _ = loader_val[i]
                test_img = test_img.unsqueeze(0)
                original_img = original_img.unsqueeze(0)
                original_img = Variable(original_img.cuda())
                model.eval()
                test_pred = model(
                    Variable(test_img.cuda(0), requires_grad=True))
                test_img = Variable(test_img.cuda(0), requires_grad=True)
                #if TBWriter and i==0:
                #    writer.add_graph(model, test_pred)
                test_pred = F.upsample_bilinear(test_pred, (1024, 2048))
                overlay_images(names,
                               original_img,
                               test_pred,
                               epoch,
                               str(i) + '_',
                               convert_id=False)
                del test_pred
                del test_img

        model.train()
        optimizer.zero_grad()
        with tqdm.tqdm(trainloader, ncols=150) as t:
            lr_ = l_rate
            for i, (images, labels) in enumerate(t):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)

                iter = len(trainloader) * epoch + i

                outputs = model(images)
                #g = make_dot(outputs)
                #g.save('./t.dot')

                loss = misc.cross_entropy2d(outputs, labels, ignore_index=255)
                loss = loss / update_batches

                loss.backward()

                t.set_description('Loss: %8.4f - LR = %f' %
                                  (update_batches * loss.data[0], lr_))

                train_loss.update(update_batches * loss.data[0])
                acc, IoU, cf_matrix = accuracy_IoU(
                    outputs, labels, np.array(range(num_classes)))
                if acc is not None:
                    train_acc.update(acc)
                    train_IoU.update(np.nanmean(IoU))
                    train_cfmatrix = train_cfmatrix + cf_matrix

                if i % update_batches == 0:
                    optimizer.step()
                    if poly_lr:
                        lr_ = poly_lr2(l_rate,
                                       iter,
                                       lr_decay_iter=1,
                                       max_iter=len(trainloader) * epochs)
                        if lr_:
                            t.set_description(
                                'Step: %8.4f - LR = %f' %
                                (update_batches * loss.data[0], lr_))
                            if opt == "SGD":
                                optimizer = torch.optim.SGD(
                                    [{
                                        'params':
                                        get_1x_lr_params_NOscale(model),
                                        'lr': lr_
                                    }, {
                                        'params': get_10x_lr_params(model),
                                        'lr': 10 * lr_
                                    }],
                                    lr=lr_,
                                    momentum=0.9,
                                    weight_decay=5e-4)
                            elif opt == "Adam":
                                optimizer = torch.optim.Adam(
                                    [{
                                        'params':
                                        get_1x_lr_params_NOscale(model),
                                        'lr': 0 * lr_
                                    }, {
                                        'params': get_10x_lr_params(model),
                                        'lr': 10 * lr_
                                    }],
                                    lr=lr_,
                                    weight_decay=5e-4)

                    #print("%8.2f %%  ->  Loss: %8.6f " % (i / len(trainloader) * 100, loss.data[0]), end='\r')
                    optimizer.zero_grad()

                if i > 0 and i % TBUpdate == 0 and TBWriter:
                    writer.add_scalar('Train Accuracy', train_acc.avg, iter)
                    writer.add_scalar('Train IoU', train_IoU.avg, iter)
                    writer.add_scalar('Train Loss', train_loss.avg, iter)

                del outputs
                del loss
                del images
                del labels

                t.update(1)

                rows = train_cfmatrix.sum(axis=1)
        cols = train_cfmatrix.sum(axis=0)
        IoU = np.ndarray(train_cfmatrix.shape[0])
        for i in range(train_cfmatrix.shape[0]):
            if rows[i] + cols[i] > 0.:
                IoU[i] = train_cfmatrix[i][i] / (rows[i] + cols[i] -
                                                 train_cfmatrix[i][i])
            else:
                IoU[i] = np.nan
        print("\nTrain Accuracy: ", train_acc.avg)
        print("Train Loss: ", train_loss.avg)
        print("Micro IoU: ", train_IoU.avg, "\n")
        print("Macro IoU: ", np.nanmean(IoU), "\n")

        if check_validation:
            #VALIDATION!!!
            val_acc = AverageMeter()
            val_IoU = AverageMeter()
            val_loss = AverageMeter()
            val_cfmatrix = np.zeros((num_classes, num_classes))
            model.eval()
            for i, (images, labels) in enumerate(valloader):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)
                iter = len(trainloader) * epoch + i
                #poly_lr_scheduler(optimizer, l_rate, iter)

                outputs = model(images)

                loss = cross_entropy2d(outputs, labels, ignore_index=255)

                val_loss.update(loss.data[0])
                acc, IoU, cf_matrix = accuracy_IoU(
                    outputs, labels, np.array(range(num_classes)))
                if acc is not None:
                    val_acc.update(acc)
                    val_IoU.update(np.nanmean(IoU))
                    val_cfmatrix = val_cfmatrix + cf_matrix

                del outputs
                del loss
                del images
                del labels
            print("\nVal Accuracy: ", val_acc.avg)
            print("Val Loss: ", val_loss.avg)
            print("Val IoU: ", val_IoU.avg, "\n")
            if TBWriter:
                writer.add_scalar('Val Accuracy', val_acc.avg, epoch)
                writer.add_scalar('Val IoU', val_IoU.avg, epoch)
                writer.add_scalar('Val Loss', val_loss.avg, epoch)

        save_metric = train_IoU.avg
        if check_validation:
            save_metric = val_IoU.avg

        if best_metric < save_metric:
            best_metric = save_metric
            print("New Best IoU!")
            if save:
                torch.save(
                    {
                        'epoch': epoch,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    }, base_save_folder + "/checkpoint_" + str(epoch) + "_" +
                    str(save_metric) + ".pth.tar")
                print("Model Saves As " + base_save_folder + "/checkpoint_" +
                      str(epoch) + "_" + str(save_metric) + ".pth.tar")
                if os.path.isfile(old_file):
                    os.remove(old_file)
                old_file = base_save_folder + "/checkpoint_" + str(
                    epoch) + "_" + str(save_metric) + ".pth.tar"

        print("Best IoU So Far: ", best_metric)

    if TBWriter:
        writer.close()
    print("End Of Training")
Exemplo n.º 6
0
def main():
    global args
    args = parser.parse_args()

    # Data preprocessing.
    print('==> Preparing data......')
    assert (args.dataset == 'cifar10' or args.dataset
            == 'cifar100'), "Only support cifar10 or cifar100 dataset"
    if args.dataset == 'cifar10':
        print('To train and eval on cifar10 dataset......')
        num_classes = 10
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean_cifar10, std_cifar10),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean_cifar10, std_cifar10),
        ])
        train_set = torchvision.datasets.CIFAR10(root='./data',
                                                 train=True,
                                                 download=True,
                                                 transform=transform_train)
        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=4)

        test_set = torchvision.datasets.CIFAR10(root='./data',
                                                train=False,
                                                download=True,
                                                transform=transform_test)
        test_loader = torch.utils.data.DataLoader(test_set,
                                                  batch_size=100,
                                                  shuffle=False,
                                                  num_workers=4)
    else:
        print('To train and eval on cifar100 dataset......')
        num_classes = 100
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean_cifar100, std_cifar100),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean_cifar100, std_cifar100),
        ])
        train_set = torchvision.datasets.CIFAR100(root='./data',
                                                  train=True,
                                                  download=True,
                                                  transform=transform_train)
        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=4)

        test_set = torchvision.datasets.CIFAR100(root='./data',
                                                 train=False,
                                                 download=True,
                                                 transform=transform_test)
        test_loader = torch.utils.data.DataLoader(test_set,
                                                  batch_size=100,
                                                  shuffle=False,
                                                  num_workers=4)

    # Model
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isdir(
            args.ckpt_path), 'Error: checkpoint directory not exists!'
        checkpoint = torch.load(os.path.join(args.ckpt_path, 'ckpt.t7'))
        model = checkpoint['model']
        best_acc = checkpoint['best_acc']
        start_epoch = checkpoint['epoch']
    else:
        print('==> Building model..')
        model = models.__dict__[args.arch](num_classes)
        start_epoch = args.start_epoch

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # Use GPUs if available.
    if torch.cuda.is_available():
        model.cuda()
        model = torch.nn.DataParallel(model,
                                      device_ids=range(
                                          torch.cuda.device_count()))
        cudnn.benchmark = True

    # Define loss function and optimizer.
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          nesterov=args.nesterov,
                          weight_decay=args.weight_decay)

    log_dir = 'logs/' + datetime.now().strftime('%B%d  %H:%M:%S')
    train_writer = SummaryWriter(os.path.join(log_dir, 'train'))
    test_writer = SummaryWriter(os.path.join(log_dir, 'test'))

    # Save argparse commandline to a file.
    with open(os.path.join(log_dir, 'commandline_args.txt'), 'w') as f:
        f.write('\n'.join(sys.argv[1:]))

    best_acc = 0  # best test accuracy

    for epoch in range(start_epoch, args.epochs):
        # Learning rate schedule.
        lr = adjust_learning_rate(optimizer, epoch + 1)
        train_writer.add_scalar('lr', lr, epoch)

        # Train for one epoch.
        train(train_loader, model, criterion, optimizer, train_writer, epoch)

        # Eval on test set.
        num_iter = (epoch + 1) * len(train_loader)
        acc = eval(test_loader, model, criterion, test_writer, epoch, num_iter)

        # Save checkpoint.
        print('Saving Checkpoint......')
        state = {
            'model': model.module if torch.cuda.is_available() else model,
            'best_acc': best_acc,
            'epoch': epoch,
        }
        if not os.path.isdir(os.path.join(log_dir, 'last_ckpt')):
            os.mkdir(os.path.join(log_dir, 'last_ckpt'))
        torch.save(state, os.path.join(log_dir, 'last_ckpt', 'ckpt.t7'))
        if acc > best_acc:
            best_acc = acc
            if not os.path.isdir(os.path.join(log_dir, 'best_ckpt')):
                os.mkdir(os.path.join(log_dir, 'best_ckpt'))
            torch.save(state, os.path.join(log_dir, 'best_ckpt', 'ckpt.t7'))

        train_writer.add_scalar('best_acc', best_acc, epoch)

    train_writer.close()
    test_writer.close()
def test_log_scalar_summary():
    logdir = './experiment/scalar'
    writer = SummaryWriter(logdir)
    for i in range(10):
        writer.add_scalar('test_scalar', i+1)
    writer.close()
Exemplo n.º 8
0
def DCGAN(epoch, noise_size, batch_size, save_period, dataset):

    if dataset == 'MNIST':
        '''location of tensorboard save file'''
        logdir = 'tensorboard/MNIST/'
        summary_writer = SummaryWriter(logdir)
        train_iter, train_data_number = Mnist_Data_Processing(batch_size)  #all

    elif dataset == 'CIFAR10':
        '''location of tensorboard save file'''
        logdir = 'tensorboard/CIFAR10/'
        summary_writer = SummaryWriter(logdir)
        train_iter, train_data_number = Image_Data_Processing(
            batch_size, "CIFAR10")  #class by class

    elif dataset == 'ImageNet':
        '''location of tensorboard save file'''
        logdir = 'tensorboard/IMAGENET/'
        summary_writer = SummaryWriter(logdir)
        train_iter, train_data_number = Image_Data_Processing(
            batch_size, "ImageNet")  #face
    else:
        print "no input data!!!"

    # No need, but must be declared.
    label = mx.nd.zeros((batch_size, ))
    '''Network'''
    generator = Generator()
    discriminator = Discriminator()
    context = mx.gpu(0)
    '''In the code below, the 'inputs_need_grad' parameter in the 'mod.bind' function is very important.'''

    # =============module G=============
    modG = mx.mod.Module(symbol=generator,
                         data_names=['noise'],
                         label_names=None,
                         context=context)
    modG.bind(data_shapes=[('noise', (batch_size, noise_size, 1, 1))],
              label_shapes=None,
              for_training=True)

    if dataset == 'MNIST':
        try:
            # load the saved modG data
            modG.load_params("MNIST_Weights/modG-10.params")
        except:
            pass

    if dataset == 'CIFAR10':
        try:
            # load the saved modG data
            modG.load_params("CIFAR10_Weights/modG-300.params")
        except:
            pass

    if dataset == 'ImageNet':
        try:
            #pass
            # load the saved modG data
            modG.load_params("ImageNet_Weights/modG-1000.params")
        except:
            pass

    modG.init_params(initializer=mx.initializer.Normal(sigma=0.02))
    modG.init_optimizer(optimizer='adam',
                        optimizer_params={
                            'learning_rate': 0.0002,
                            'beta1': 0.5
                        })

    # =============module discriminator[0],discriminator[1]=============
    modD_0 = mx.mod.Module(symbol=discriminator[0],
                           data_names=['data'],
                           label_names=None,
                           context=context)
    modD_0.bind(data_shapes=train_iter.provide_data,
                label_shapes=None,
                for_training=True,
                inputs_need_grad=True)

    if dataset == 'MNIST':
        try:
            # load the saved modG data
            modD_0.load_params("MNIST_Weights/modD_0-10.params")
        except:
            pass
    if dataset == 'CIFAR10':
        try:
            # load the saved modG data
            modD_0.load_params("CIFAR10_Weights/modD_0-200.params")
        except:
            pass

    if dataset == 'ImageNet':
        #pass
        try:
            # load the saved modG data
            modD_0.load_params("ImageNet_Weights/modD_0-1000.params")
        except:
            pass

    modD_0.init_params(initializer=mx.initializer.Normal(sigma=0.02))
    modD_0.init_optimizer(optimizer='adam',
                          optimizer_params={
                              'learning_rate': 0.0002,
                              'beta1': 0.5
                          })
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).

    In here, for sharing the Discriminator parameters, we must to use shared_module=modD_0
    """
    modD_1 = mx.mod.Module(symbol=discriminator[1],
                           data_names=['data'],
                           label_names=None,
                           context=context)
    modD_1.bind(data_shapes=train_iter.provide_data,
                label_shapes=None,
                for_training=True,
                inputs_need_grad=True,
                shared_module=modD_0)

    # =============generate image=============
    column_size = 10
    row_size = 10
    test_mod = mx.mod.Module(symbol=generator,
                             data_names=['noise'],
                             label_names=None,
                             context=context)
    test_mod.bind(data_shapes=[
        mx.io.DataDesc(name='noise',
                       shape=(column_size * row_size, noise_size, 1, 1))
    ],
                  label_shapes=None,
                  shared_module=modG,
                  for_training=False,
                  grad_req='null')
    '''############Although not required, the following code should be declared.#################'''
    '''make evaluation method 1 - Using existing ones.
        metrics = {
        'acc': Accuracy,
        'accuracy': Accuracy,
        'ce': CrossEntropy,
        'f1': F1,
        'mae': MAE,
        'mse': MSE,
        'rmse': RMSE,
        'top_k_accuracy': TopKAccuracy
    }'''

    metric = mx.metric.create(['acc', 'mse'])
    '''make evaluation method 2 - Making new things.'''
    '''
    Custom evaluation metric that takes a NDArray function.
    Parameters:
    •feval (callable(label, pred)) – Customized evaluation function.
    •name (str, optional) – The name of the metric.
    •allow_extra_outputs (bool) – If true, the prediction outputs can have extra outputs.
    This is useful in RNN, where the states are also produced in outputs for forwarding.
    '''
    def zero(label, pred):
        return 0

    null = mx.metric.CustomMetric(zero)

    ####################################training loop############################################
    # =============train===============
    for epoch in xrange(1, epoch + 1, 1):
        Max_cost_0 = 0
        Max_cost_1 = 0
        Min_cost = 0
        total_batch_number = np.ceil(train_data_number / (batch_size * 1.0))
        train_iter.reset()
        for batch in train_iter:

            noise = mx.random.uniform(low=-1.0,
                                      high=1.0,
                                      shape=(batch_size, noise_size, 1, 1),
                                      ctx=context)
            modG.forward(data_batch=mx.io.DataBatch(data=[noise], label=None),
                         is_train=True)
            modG_output = modG.get_outputs()

            ################################updating only parameters related to modD.########################################
            # update discriminator on noise data
            '''MAX : modD_1 : cost : (-mx.symbol.log(1-discriminator2))  - noise data Discriminator update , bigger and bigger -> smaller and smaller discriminator2'''

            modD_1.forward(data_batch=mx.io.DataBatch(data=modG_output,
                                                      label=None),
                           is_train=True)
            '''Max_Cost of noise data Discriminator'''
            Max_cost_1 += modD_1.get_outputs()[0].asnumpy().astype(np.float32)
            modD_1.backward()
            modD_1.update()

            # updating discriminator on real data
            '''MAX : modD_0 : cost: (-mx.symbol.log(discriminator2)) real data Discriminator update , bigger and bigger discriminator2'''
            modD_0.forward(data_batch=batch, is_train=True)
            '''Max_Cost of real data Discriminator'''
            Max_cost_0 += modD_0.get_outputs()[0].asnumpy().astype(np.float32)
            modD_0.backward()
            modD_0.update()

            ################################updating only parameters related to modG.########################################
            # update generator on noise data
            '''MIN : modD_0 : cost : (-mx.symbol.log(discriminator2)) - noise data Discriminator update  , bigger and bigger discriminator2'''
            modD_0.forward(data_batch=mx.io.DataBatch(data=modG_output,
                                                      label=None),
                           is_train=True)
            modD_0.backward()
            '''Max_Cost of noise data Generator'''
            Min_cost += modD_0.get_outputs()[0].asnumpy().astype(np.float32)

            diff_v = modD_0.get_input_grads()
            modG.backward(diff_v)
            modG.update()
        '''tensorboard part'''
        Max_C = ((Max_cost_0 + Max_cost_1) / total_batch_number * 1.0).mean()
        Min_C = (Min_cost / total_batch_number * 1.0).mean()

        arg_params, aux_params = modG.get_params()
        #write scalar values

        summary_writer.add_scalar(name="Max_cost",
                                  scalar_value=Max_C,
                                  global_step=epoch)
        summary_writer.add_scalar(name="Min_cost",
                                  scalar_value=Min_C,
                                  global_step=epoch)

        #write matrix values

        summary_writer.add_histogram(
            name="g1_weight", values=arg_params["g1_weight"].asnumpy().ravel())
        summary_writer.add_histogram(
            name="g2_weight", values=arg_params["g2_weight"].asnumpy().ravel())
        summary_writer.add_histogram(
            name="g3_weight", values=arg_params["g3_weight"].asnumpy().ravel())
        summary_writer.add_histogram(
            name="g4_weight", values=arg_params["g4_weight"].asnumpy().ravel())
        summary_writer.add_histogram(
            name="g5_weight", values=arg_params["g5_weight"].asnumpy().ravel())

        # cost print
        print "epoch : {}".format(epoch)
        print "Max Discriminator Cost : {}".format(Max_C)
        print "Min Generator Cost : {}".format(Min_C)

        #Save the data
        if epoch % save_period == 0:

            # write image values
            generate_image = modG_output[0][0].asnumpy()  # only one image
            generate_image = (generate_image + 1.0) * 127.5
            '''
            Args:
            tag: A name for the generated node. Will also serve as a series name in
            TensorBoard.
            tensor: A 3-D `uint8` or `float32` `Tensor` of shape `[height, width,
            channels]` where `channels` is 1, 3, or 4.
            '''
            generate_image = generate_image.astype(
                np.uint8
            )  # only dtype uint8 ,  Only this is done...- Should be improved.
            summary_writer.add_image(
                tag='generate_image_epoch_{}'.format(epoch),
                img_tensor=generate_image.transpose(1, 2, 0))

            print('Saving weights')
            if dataset == "MNIST":
                modG.save_params("MNIST_Weights/modG-{}.params".format(epoch))
                modD_0.save_params(
                    "MNIST_Weights/modD_0-{}.params".format(epoch))
            elif dataset == "CIFAR10":
                modG.save_params(
                    "CIFAR10_Weights/modG-{}.params".format(epoch))
                modD_0.save_params(
                    "CIFAR10_Weights/modD_0-{}.params".format(epoch))
            elif dataset == 'ImageNet':
                modG.save_params(
                    "ImageNet_Weights/modG-{}.params".format(epoch))
                modD_0.save_params(
                    "ImageNet_Weights/modD_0-{}.params".format(epoch))
            '''test_method-2'''
            test = mx.random.uniform(low=-1.0,
                                     high=1.0,
                                     shape=(column_size * row_size, noise_size,
                                            1, 1),
                                     ctx=context)
            test_mod.forward(
                data_batch=mx.io.DataBatch(data=[test], label=None))
            result = test_mod.get_outputs()[0]
            result = result.asnumpy()
            '''range adjustment  -1 ~ 1 -> 0 ~ 2 -> 0 ~1  -> 0 ~ 255 '''
            # result = np.clip((result + 1.0) * (255.0 / 2.0), 0, 255).astype(np.uint8)
            result = ((result + 1.0) * 127.5).astype(np.uint8)
            '''Convert the image size to 4 times'''
            result = np.asarray([[
                cv2.resize(i, None, fx=2, fy=2, interpolation=cv2.INTER_AREA)
                for i in im
            ] for im in result])

            result = result.transpose((0, 2, 3, 1))
            '''visualization'''
            fig, ax = plt.subplots(row_size,
                                   column_size,
                                   figsize=(column_size, row_size))
            fig.suptitle('generator')
            for j in xrange(row_size):
                for i in xrange(column_size):
                    ax[j][i].set_axis_off()
                    if dataset == "MNIST":
                        ax[j][i].imshow(result[i + j * column_size],
                                        cmap='gray')
                    elif dataset == "CIFAR10":
                        ax[j][i].imshow(result[i + j * column_size])
                    elif dataset == 'ImageNet':
                        ax[j][i].imshow(result[i + j * column_size])

            if dataset == "MNIST":
                fig.savefig(
                    "Generate_Image/DCGAN_MNIST_Epoch_{}.png".format(epoch))
            elif dataset == "CIFAR10":
                fig.savefig(
                    "Generate_Image/DCGAN_CIFAR10_Epoch_{}.png".format(epoch))
            elif dataset == 'ImageNet':
                fig.savefig(
                    "Generate_Image/DCGAN_ImageNet_Epoch_{}.png".format(epoch))

            plt.close(fig)

    print "Optimization complete."
    '''tensorboard_part'''
    summary_writer.close()

    #################################Generating Image####################################
    '''load method1 - load the training mod.get_params() directly'''
    #arg_params, aux_params = mod.get_params()
    '''Annotate only when running test data. and Uncomment only if it is 'load method2' '''
    #test_mod.set_params(arg_params=arg_params, aux_params=aux_params)
    '''test_method-1'''
    '''
    noise = noise_iter.next()
    test_mod.forward(noise, is_train=False)
    result = test_mod.get_outputs()[0]
    result = result.asnumpy()
    print np.shape(result)
    '''
    '''load method2 - using the shared_module'''
    """
    Parameters
    shared_module : Module
        Default is `None`. This is used in bucketing. When not `None`, the shared module
        essentially corresponds to a different bucket -- a module with different symbol
        but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
    """
    '''test_method-2'''
    test = mx.random.uniform(low=-1.0,
                             high=1.0,
                             shape=(column_size * row_size, noise_size, 1, 1),
                             ctx=context)
    test_mod.forward(data_batch=mx.io.DataBatch(data=[test], label=None))
    result = test_mod.get_outputs()[0]
    result = result.asnumpy()
    '''range adjustment  -1 ~ 1 -> 0 ~ 2 -> 0 ~1  -> 0 ~ 255 '''
    #result = np.clip((result + 1.0) * (255.0 / 2.0), 0, 255).astype(np.uint8)
    result = ((result + 1.0) * 127.5).astype(np.uint8)
    '''Convert the image size to 4 times'''
    result = np.asarray([[
        cv2.resize(i, None, fx=2, fy=2, interpolation=cv2.INTER_AREA)
        for i in im
    ] for im in result])

    result = result.transpose((0, 2, 3, 1))
    '''visualization'''
    fig, ax = plt.subplots(row_size,
                           column_size,
                           figsize=(column_size, row_size))
    fig.suptitle('generator')
    for j in xrange(row_size):
        for i in xrange(column_size):
            ax[j][i].set_axis_off()
            if dataset == "MNIST":
                ax[j][i].imshow(result[i + j * column_size], cmap='gray')
            elif dataset == "CIFAR10":
                ax[j][i].imshow(result[i + j * column_size])
            elif dataset == 'ImageNet':
                ax[j][i].imshow(result[i + j * column_size])

    if dataset == "MNIST":
        fig.savefig("Generate_Image/DCGAN_MNIST_Final.png")
    elif dataset == "CIFAR10":
        fig.savefig("Generate_Image/DCGAN_CIFAR10_Final.png")
    elif dataset == 'ImageNet':
        fig.savefig("Generate_Image/DCGAN_ImageNet_Final.png")

    plt.show(fig)
Exemplo n.º 9
0
def train():
    if use_weights:
        weight = torch.ones(num_classes)
        '''
        #The following wheigts are taken from https://github.com/Eromera/erfnet_pytorch/blob/master/train/main.py
        weight[0] = 2.8149201869965
        weight[1] = 6.9850029945374
        weight[2] = 3.7890393733978
        weight[3] = 9.9428062438965
        weight[4] = 9.7702074050903
        weight[5] = 9.5110931396484
        weight[6] = 10.311357498169
        weight[7] = 10.026463508606
        weight[8] = 4.6323022842407
        weight[9] = 9.5608062744141
        weight[10] = 7.8698215484619
        weight[11] = 9.5168733596802
        weight[12] = 10.373730659485
        weight[13] = 6.6616044044495
        weight[14] = 10.260489463806
        weight[15] = 10.287888526917
        weight[16] = 10.289801597595
        weight[17] = 10.405355453491
        weight[18] = 10.138095855713
        '''

        #The following weights are calculated using calculate_weights.py (hist.median() / hist)
        weight[0] = 0.0238
        weight[1] = 0.1540
        weight[2] = 0.0447
        weight[3] = 1.3481
        weight[4] = 1.0000
        weight[5] = 0.7090
        weight[6] = 4.6042
        weight[7] = 1.6716
        weight[8] = 0.0622
        weight[9] = 0.7796
        weight[10] = 0.3195
        weight[11] = 0.6157
        weight[12] = 5.2630
        weight[13] = 0.1177
        weight[14] = 3.0565
        weight[15] = 3.2344
        weight[16] = 3.4215
        weight[17] = 8.1690
        weight[18] = 1.9417

    else:
        weight = None

    loader_train = CityscapesLoader2(base_data_folder,
                                     split='train',
                                     img_size=None,
                                     transforms=data_augmentation_train)
    trainloader = data.DataLoader(loader_train,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  shuffle=True,
                                  pin_memory=True)
    #loader_test = CityscapesLoader2(base_data_folder, split='test', is_transform=True, img_size=None, transforms=data_augmentation)
    #test_loader = data.DataLoader(loader_test, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory=True)
    loader_val = CityscapesLoader2(base_data_folder,
                                   split='val',
                                   img_size=None,
                                   transforms=data_augmentation_val)
    valloader = data.DataLoader(loader_val,
                                batch_size=batch_size,
                                num_workers=num_workers,
                                shuffle=False,
                                pin_memory=True)

    model = psp_net.PSPNet(num_classes)

    if TBWriter:
        writer = SummaryWriter('./runs/PSP2/')
    '''
    if resume:
        print("Loading from: ", resume_filename)
        saved_state_dict = torch.load(resume_filename)
        if num_classes != 21:
            for i in saved_state_dict:
                # Scale.layer5.conv2d_list.3.weight
                i_parts = i.split('.')
                if i_parts[1] == 'layer5':
                    saved_state_dict[i] = model.state_dict()[i]

        model.load_state_dict(saved_state_dict)
    '''

    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda(0)
        if use_weights:
            weight = weight.cuda()
    else:
        print("Using CPU")

    model.train()

    if opt == "SGD":
        optimizer = torch.optim.SGD([{
            'params': [
                param for name, param in model.named_parameters()
                if name[-4:] == 'bias'
            ],
            'lr':
            2 * l_rate
        }, {
            'params': [
                param for name, param in model.named_parameters()
                if name[-4:] != 'bias'
            ],
            'lr':
            l_rate,
            'weight_decay':
            weight_decay
        }],
                                    momentum=0.9)
    elif opt == "Adam":
        optimizer = torch.optim.Adam([{
            'params': [
                param for name, param in model.named_parameters()
                if name[-4:] == 'bias'
            ],
            'lr':
            2 * l_rate
        }, {
            'params': [
                param for name, param in model.named_parameters()
                if name[-4:] != 'bias'
            ],
            'lr':
            l_rate,
            'weight_decay':
            weight_decay
        }])

    if resume:
        print("Resuming From ", resume_filename)
        checkpoint = torch.load(resume_filename)
        saved_state_dict = checkpoint['state_dict']
        starting_epoch = checkpoint['epoch']
        starting_iteration = int(checkpoint['iter'] % 35700 / batch_size)
        print("Startin epoch: " + str(starting_epoch) + ", starting iter: ",
              str(starting_iteration))
        if poly_lr:
            lr_ = poly_lr2(l_rate,
                           starting_iteration +
                           len(trainloader) * starting_epoch,
                           lr_decay_iter=1,
                           max_iter=len(trainloader) * epochs)
            if lr_:
                optimizer.param_groups[0]['lr'] = 2 * lr_
                optimizer.param_groups[1]['lr'] = lr_
        model.load_state_dict(saved_state_dict)

    best_metric = 0
    old_file = ""
    old_checkpoint = ""
    train_acc = AverageMeter()
    train_IoU = AverageMeter()
    train_loss = AverageMeter()
    local_acc = AverageMeter(moving_average=moving_average)
    local_IoU = AverageMeter(moving_average=moving_average)
    local_loss = AverageMeter(moving_average=moving_average)
    for epoch in range(starting_epoch, epochs):
        train_acc.reset()
        train_IoU.reset()
        train_loss.reset()
        train_cfmatrix = np.zeros((num_classes, num_classes))

        print("\nEpoch: ", epoch)

        if overlay_during_training and epoch % 1 == 0:
            for i in range(15):
                print("Overlaying image ", i)
                test_img, _ = loader_val[i]
                test_img = test_img.unsqueeze(0)
                #original_img = original_img.unsqueeze(0)
                #original_img = Variable(original_img.cuda())
                model.eval()
                test_pred = model(
                    Variable(test_img.cuda(0), requires_grad=True))
                test_img = Variable(test_img.cuda(0), requires_grad=True)
                #if TBWriter and i==0:
                #    writer.add_graph(model, test_pred)
                #test_pred = F.upsample_bilinear(test_pred, (1024, 2048))
                overlay_images('',
                               test_img,
                               test_pred,
                               epoch,
                               str(i) + '_',
                               convert_id=False)
                del test_pred
                del test_img

        model.train()
        optimizer.zero_grad()
        with tqdm.tqdm(trainloader, ncols=150) as t:
            if epoch == starting_epoch:
                t.update(starting_iteration)
            for i, (images, labels) in enumerate(t):
                if torch.cuda.is_available():
                    images = Variable(images).cuda(0)
                    labels = Variable(labels).cuda(0)
                else:
                    images = Variable(images)
                    labels = Variable(labels)

                iteration = len(trainloader) * epoch + i
                processed_image = i * batch_size
                if epoch == starting_epoch:
                    iteration += starting_iteration
                    processed_image += starting_iteration * batch_size

                outputs, aux = model(images)
                #g = make_dot(outputs)
                #g.save('./t.dot')

                main_loss = misc.cross_entropy2d(outputs,
                                                 labels,
                                                 weight=weight,
                                                 ignore_index=255)
                aux_loss = misc.cross_entropy2d(aux, labels, ignore_index=255)

                loss = main_loss + 0.4 * aux_loss
                loss = loss / update_batches

                loss.backward()

                t.set_description('Loss: %8.4f - LR = %f' %
                                  (update_batches * loss.data[0], lr_))

                train_loss.update(update_batches * loss.data[0])
                local_loss.update(update_batches * loss.data[0])
                acc, IoU, cf_matrix = accuracy_IoU(
                    outputs, labels, np.array(range(num_classes)))
                if acc is not None:
                    train_acc.update(acc)
                    train_IoU.update(np.nanmean(IoU))
                    local_acc.update(acc)
                    local_IoU.update(np.nanmean(IoU))
                    train_cfmatrix = train_cfmatrix + cf_matrix

                if i % update_batches == 0:
                    optimizer.step()
                    if poly_lr:
                        lr_ = poly_lr2(l_rate,
                                       iteration,
                                       lr_decay_iter=1,
                                       max_iter=len(trainloader) * epochs)
                        if lr_:
                            t.set_description(
                                'Step: %8.4f - LR = %f' %
                                (update_batches * loss.data[0], lr_))
                            optimizer.param_groups[0]['lr'] = 2 * lr_
                            optimizer.param_groups[1]['lr'] = lr_

                    #print("%8.2f %%  ->  Loss: %8.6f " % (i / len(trainloader) * 100, loss.data[0]), end='\r')
                    optimizer.zero_grad()

                if local_acc.count > 500 and processed_image % TBUpdate == 0 and TBWriter:
                    writer.add_scalar('Train Accuracy', local_acc.avg,
                                      iteration * batch_size)
                    writer.add_scalar('Train IoU', local_IoU.avg,
                                      iteration * batch_size)
                    writer.add_scalar('Train Loss', local_loss.avg,
                                      iteration * batch_size)

                del outputs
                del loss
                del images
                del labels

                if i > 0 and local_acc.count > 500 and processed_image % checkpoint_save == 0:
                    save_name = base_save_folder + "/checkpoint_" + str(
                        epoch) + "_" + str(processed_image) + "_" + str(
                            local_IoU.avg) + ".pth.tar"
                    torch.save(
                        {
                            'epoch': epoch,
                            'iter': processed_image,
                            'state_dict': model.state_dict(),
                            'optimizer': optimizer.state_dict(),
                        }, save_name)
                    print("Model Saved As " + save_name)
                    if os.path.isfile(old_checkpoint):
                        os.remove(old_checkpoint)
                    old_checkpoint = save_name

                t.update(1)

                if i + starting_iteration + 1 == len(trainloader):
                    break

        rows = train_cfmatrix.sum(axis=1)
        cols = train_cfmatrix.sum(axis=0)
        IoU = np.ndarray(train_cfmatrix.shape[0])
        for i in range(train_cfmatrix.shape[0]):
            if rows[i] + cols[i] > 0.:
                IoU[i] = train_cfmatrix[i][i] / (rows[i] + cols[i] -
                                                 train_cfmatrix[i][i])
            else:
                IoU[i] = np.nan
        print("\nMicro Accuracy: ", train_acc.avg)
        print("Macro Accuracy: ",
              np.trace(train_cfmatrix) / np.sum(train_cfmatrix))
        print("Micro IoU: ", train_IoU.avg, "\n")
        print("Macro IoU: ", np.nanmean(IoU), "\n")
        print("Train Loss: ", train_loss.avg)

        if check_validation:
            val_IoU = eval(model)
            '''
            #VALIDATION!!!
            val_acc = AverageMeter()
            val_IoU = AverageMeter()
            val_loss = AverageMeter()
            val_cfmatrix = np.zeros((num_classes, num_classes))
            model.eval()
            for i, (images, labels) in enumerate(valloader):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)
                iteration = len(trainloader) * epoch + i
                #poly_lr_scheduler(optimizer, l_rate, iter)

                outputs = model(images)

                loss = cross_entropy2d(outputs, labels, ignore_index=255)

                val_loss.update(loss.data[0])
                acc, IoU, cf_matrix = accuracy_IoU(outputs,labels, np.array(range(num_classes)))
                if acc is not None:
                    val_acc.update(acc)
                    val_IoU .update(np.nanmean(IoU))
                    val_cfmatrix = val_cfmatrix + cf_matrix

                del outputs
                del loss
                del images
                del labels
            print("\nVal Accuracy: ", val_acc.avg)
            print("Val Loss: ", val_loss.avg)
            print("Val IoU: ", val_IoU.avg, "\n")
            if TBWriter:
                writer.add_scalar('Val Accuracy', val_acc.avg, epoch)
                writer.add_scalar('Val IoU', val_IoU.avg, epoch)
                writer.add_scalar('Val Loss', val_loss.avg, epoch)
            '''

        save_metric = np.nanmean(IoU)
        if check_validation and doIouOrig:
            save_metric = val_IoU

        if best_metric < save_metric:
            best_metric = save_metric
            print("New Best IoU!")
            if save:
                torch.save(
                    {
                        'epoch': epoch + 1,
                        'iter': 0,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    }, base_save_folder + "/checkpoint_" + str(epoch) + "_" +
                    str(save_metric) + ".pth.tar")
                print("Model Saved As " + base_save_folder + "/checkpoint_" +
                      str(epoch) + "_" + str(save_metric) + ".pth.tar")
                if os.path.isfile(old_file):
                    os.remove(old_file)
                old_file = base_save_folder + "/checkpoint_" + str(
                    epoch) + "_" + str(save_metric) + ".pth.tar"

        print("Best IoU So Far: ", best_metric)

    if TBWriter:
        writer.close()
    print("End Of Training")
Exemplo n.º 10
0
def train():
    weight = torch.ones(num_classes)
    if (enc):
        weight[0] = 2.3653597831726
        weight[1] = 4.4237880706787
        weight[2] = 2.9691488742828
        weight[3] = 5.3442072868347
        weight[4] = 5.2983593940735
        weight[5] = 5.2275490760803
        weight[6] = 5.4394111633301
        weight[7] = 5.3659925460815
        weight[8] = 3.4170460700989
        weight[9] = 5.2414722442627
        weight[10] = 4.7376127243042
        weight[11] = 5.2286224365234
        weight[12] = 5.455126285553
        weight[13] = 4.3019247055054
        weight[14] = 5.4264230728149
        weight[15] = 5.4331531524658
        weight[16] = 5.433765411377
        weight[17] = 5.4631009101868
        weight[18] = 5.3947434425354
    else:
        weight[0] = 2.8149201869965
        weight[1] = 6.9850029945374
        weight[2] = 3.7890393733978
        weight[3] = 9.9428062438965
        weight[4] = 9.7702074050903
        weight[5] = 9.5110931396484
        weight[6] = 10.311357498169
        weight[7] = 10.026463508606
        weight[8] = 4.6323022842407
        weight[9] = 9.5608062744141
        weight[10] = 7.8698215484619
        weight[11] = 9.5168733596802
        weight[12] = 10.373730659485
        weight[13] = 6.6616044044495
        weight[14] = 10.260489463806
        weight[15] = 10.287888526917
        weight[16] = 10.289801597595
        weight[17] = 10.405355453491
        weight[18] = 10.138095855713

    #weight[19] = 0

    loader_train = CityscapesLoader2(base_data_folder, split='train',img_size=image_shape, transforms=data_augmentation_train)
    trainloader = data.DataLoader(loader_train, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=True)
    #loader_test = CityscapesLoader2(base_data_folder, split='test', is_transform=True, img_size=None, transforms=data_augmentation)
    #test_loader = data.DataLoader(loader_test, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory=True)
    loader_val = CityscapesLoader2(base_data_folder, split='val', img_size=image_shape, transforms=data_augmentation_val)
    valloader = data.DataLoader(loader_val, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory=True)

    model = erfnet.ERFNet(num_classes)

    if TBWriter:
        writer = SummaryWriter('./runs/ERF_Fine/')

    '''
    if resume:
        print("Loading from: ", resume_filename)
        saved_state_dict = torch.load(resume_filename)
        if num_classes != 21:
            for i in saved_state_dict:
                # Scale.layer5.conv2d_list.3.weight
                i_parts = i.split('.')
                if i_parts[1] == 'layer5':
                    saved_state_dict[i] = model.state_dict()[i]

        model.load_state_dict(saved_state_dict)
    '''

    if opt == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), l_rate, momentum=0.9, weight_decay=weight_decay)
    elif opt =="Adam":
        optimizer = torch.optim.Adam(model.parameters(), l_rate, (0.9, 0.999), eps=1e-08, weight_decay=weight_decay)

    starting_epoch = 0
    starting_iteration = 0
    lr_ = l_rate
    if resume:
        print("Loading Encoder Weights from: ", encoder_weights)
        checkpoint = torch.load(encoder_weights)
        saved_state_dict = checkpoint['state_dict']
        print("Startin epoch: "+str(starting_epoch)+", starting iter: ",str(starting_iteration))
        if poly_lr:
            lr_ = poly_lr2(l_rate, starting_iteration + len(trainloader) * starting_epoch, lr_decay_iter=1, max_iter=len(trainloader) * epochs)
            if lr_:
                optimizer.param_groups[0]['lr'] = lr_
        if enc:
            model.load_state_dict(saved_state_dict)
        else:
            pretrained_enc = erfnet.ERFNet(num_classes)
            pretrained_enc.load_state_dict(saved_state_dict)
            pretrained_enc = pretrained_enc.encoder
            if freeze_encoder:
                pretrained_enc.eval()
                for param in pretrained_enc.parameters():
                    param.requires_grad = False
            decoder = model.decoder
            if decoder_weights is not None:
                print("Loading Decoder Weights from: ", decoder_weights)
                checkpoint = torch.load(decoder_weights)
                saved_state_dict = checkpoint['state_dict']
                #starting_epoch = checkpoint['epoch']
                #starting_iteration = int(checkpoint['iter'] % 2975 / batch_size)
                decoder.load_state_dict(saved_state_dict)

            model.encoder = pretrained_enc
            model.decoder = decoder


    if torch.cuda.is_available():
        print("Using GPU")
        model.cuda(0)
        if use_weights:
            weight = weight.cuda()
    else:
        print("Using CPU")

    model.train()

    best_metric = 0
    old_file = ""
    old_checkpoint = ""
    train_acc = AverageMeter()
    train_IoU = AverageMeter()
    train_loss = AverageMeter()
    local_acc = AverageMeter(moving_average=moving_average)
    local_IoU = AverageMeter(moving_average=moving_average)
    local_loss = AverageMeter(moving_average=moving_average)
    mean_time = AverageMeter()
    for epoch in range(starting_epoch, epochs):
        train_acc.reset()
        train_IoU.reset()
        train_loss.reset()
        mean_time.reset()
        train_cfmatrix = np.zeros((num_classes, num_classes))

        print(colors.YELLOW+"========== EPOCH:"+str(epoch)+ " ==========" + colors.ENDC)

        model.train()
        optimizer.zero_grad()
        with tqdm.tqdm(trainloader, ncols=150) as t:
            if epoch == starting_epoch:
                t.update(starting_iteration)
            for i, (images, labels) in enumerate(t):
                start_time = time.time()
                if torch.cuda.is_available():
                    images = Variable(images).cuda(0)
                    labels = Variable(labels).cuda(0)
                else:
                    images = Variable(images)
                    labels = Variable(labels)

                iteration = len(trainloader) * epoch + i
                processed_image = i * batch_size
                if epoch == starting_epoch:
                    iteration += starting_iteration
                    processed_image += starting_iteration * batch_size

                if enc:
                    outputs = model(images, only_encode=True)
                else:
                    outputs = model(images, only_encode=False)
                #g = make_dot(outputs)
                #g.save('./t.dot')

                main_loss = misc.cross_entropy2d(outputs, labels,weight=weight, ignore_index=255)

                loss = main_loss
                loss = loss / update_batches

                loss.backward()
                mean_time.update(time.time() - start_time)

                t.set_description('Loss: %8.4f - Time: %8.4f - LR: %8.6f' % (update_batches * loss.data[0], mean_time.avg / batch_size, lr_))

                train_loss.update(update_batches * loss.data[0])
                local_loss.update(update_batches * loss.data[0])
                if doTrainStats:
                    if doCFMatrixTrain:
                        acc, IoU, cf_matrix =  accuracy_IoU_CFMatrix(outputs,labels, np.array(range(num_classes)))
                        IoU = IoU.mean
                    else:
                        acc, IoU =  accuracy_IoU(outputs,labels, np.array(range(num_classes)))

                    if acc is not None:
                        train_acc.update(acc)
                        train_IoU.update(IoU)
                        local_acc.update(acc)
                        local_IoU.update(IoU)
                        if doCFMatrixTrain:
                            train_cfmatrix = train_cfmatrix + cf_matrix

                if i % update_batches == 0:
                    optimizer.step()
                    if poly_lr:
                        lr_ = poly_lr2(l_rate, iteration, lr_decay_iter=1, max_iter=len(trainloader) * epochs)
                        if lr_:
                            t.set_description('Step: %8.4f - Time: %8.4f - LR: %8.6f' % (
                                update_batches * loss.data[0], mean_time.avg / batch_size, lr_))
                            optimizer.param_groups[0]['lr'] = lr_

                    #print("%8.2f %%  ->  Loss: %8.6f " % (i / len(trainloader) * 100, loss.data[0]), end='\r')
                    optimizer.zero_grad()

                if local_loss.count > int(500 / batch_size) and processed_image % TBUpdate == 0 and TBWriter:
                    writer.add_scalar('Train Loss', local_loss.avg, iteration * batch_size)
                    if doTrainStats:
                        writer.add_scalar('Train Accuracy', local_acc.avg, iteration * batch_size)
                        writer.add_scalar('Train IoU', local_IoU.avg, iteration * batch_size)

                del outputs
                del loss
                del images
                del labels

                if i>0 and local_loss.count > int(500 / batch_size) and processed_image % checkpoint_save == 0:
                    save_name = base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(processed_image) + "_" + str(local_loss.avg) + ".pth.tar"
                    torch.save({
                        'epoch': epoch,
                        'iter' : processed_image,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                    },
                        save_name)
                    print("\nModel Saved As " + save_name)
                    if os.path.isfile(old_checkpoint):
                        os.remove(old_checkpoint)
                    old_checkpoint = save_name

                t.update(1)

        if doCFMatrixTrain and doTrainStats:
            rows = train_cfmatrix.sum(axis=1)
            cols = train_cfmatrix.sum(axis=0)
            IoU = np.ndarray(train_cfmatrix.shape[0])
            for i in range(train_cfmatrix.shape[0]):
                if rows[i] + cols[i] > 0.:
                    IoU[i] = train_cfmatrix[i][i] / (rows[i] + cols[i] - train_cfmatrix[i][i])
                else:
                    IoU[i] = np.nan
            print("Macro IoU: ", np.nanmean(IoU), "\n")
            print("Macro Accuracy: ", np.trace(train_cfmatrix) / np.sum(train_cfmatrix))

        if doTrainStats:
            print("\nMicro Accuracy: ", train_acc.avg)
            print("Micro IoU: ", train_IoU.avg, "\n")
        print("Train Loss: ", train_loss.avg)

        if check_validation:
            val_IoU = eval(model)
            if TBWriter:
                writer.add_scalar('Val IoU', val_IoU, epoch)
        if doCFMatrixTrain:
            save_metric = np.nanmean(IoU)
        elif doTrainStats:
            save_metric = train_IoU.avg
        else:
            save_metric = train_loss.avg
        if check_validation and doIouOrig:
            save_metric = val_IoU

        if best_metric < save_metric:
            best_metric = save_metric
            print("New Best IoU!")
            if save:
                torch.save({
                    'epoch': epoch + 1,
                    'iter' : 0,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                 base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar")
                print("Model Saved As " + base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar")
                if os.path.isfile(old_file):
                    os.remove(old_file)
                old_file = base_save_folder + "/checkpoint_" + str(epoch) + "_" + str(save_metric) + ".pth.tar"

        print("Best IoU So Far: ", best_metric)

    if TBWriter:
        writer.close()
    print("End Of Training")