Exemplo n.º 1
0
def test(sess, dataset):
    with sess.as_default():

        input_image = sess.graph.get_tensor_by_name('Image:0')
        decision_out = sess.graph.get_tensor_by_name('decision_out:0')
        mask_out = sess.graph.get_tensor_by_name('mask_out:0')
        DataManager = dataset
        num_step = 0.0
        accuracy = 0.0
        false_account = 0
        iouGen = iouEval()
        for batch in range(DataManager.number_batch):
            img_batch, mask_batch, label_batch, _ = sess.run(
                DataManager.next_batch)
            start = timer()
            b, decision = sess.run([mask_out, decision_out],
                                   feed_dict={input_image: img_batch})

            iouGen.addBatch(mask_batch, b)
            print(decision)
            end = timer()
            if decision > 0.5:
                false_account += 1
            # print(end-start)
            if decision[0][0] >= 0.5 and label_batch[0][0] == 1:
                step_accuracy = 1
            elif decision[0][0] < 0.5 and label_batch[0][0] == 0:
                step_accuracy = 1
            else:
                step_accuracy = 0
            accuracy = accuracy + step_accuracy
            num_step = num_step + 1

            cv2.imwrite('visualization/{}.png'.format(str(batch)),
                        np.squeeze(b, axis=(0, -1)) * 255)
        iou = iouGen.getIoU()
        accuracy /= num_step
        print("accuracy: {}		iou:{}".format(accuracy, iou))

        return accuracy
Exemplo n.º 2
0
    def valid_segmentation(self):

        with self.sess.as_default():
            self.logger.info('start validing segmentation')
            print('start validing segmentation')
            DataManager = self.DataManager_valid
            total_loss = 0.0
            num_step = 0.0
            accuracy = 0.0
            valIoU = iouEval(self.__Param["batch_size"])

            for batch in range(DataManager.number_batch):
                img_batch, mask_batch, label_batch, _ = self.sess.run(
                    DataManager.next_batch)

                a, b, total_loss_value_batch = self.sess.run(
                    [
                        self.model.mask, self.model.mask_out,
                        self.model.segmentation_loss
                    ],
                    feed_dict={
                        self.model.Image: img_batch,
                        self.model.mask: mask_batch,
                        self.model.label: label_batch,
                        self.model.is_training_seg: TRAIN_MODE_IN_VALID,
                        self.model.is_training_dec: TRAIN_MODE_IN_VALID
                    })
                # self.visualization(img_batch, label_pixel_batch,mask_batch, file_name_batch,save_dir=visualization_dir)
                valIoU.addBatch(a, b)

                # total_loss = (total_loss*(num_step)+ total_loss_value_batch)/(num_step+1)
                num_step = num_step + 1
                total_loss += total_loss_value_batch
            total_loss /= num_step
            self.logger.info(" validation loss = {}".format(total_loss))
            val_iou = valIoU.getIoU()
            return total_loss, val_iou
Exemplo n.º 3
0
def train(args, model, enc=False):
    best_acc = 0

    #TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
    #create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

    weight = torch.ones(NUM_CLASSES)
    if (enc):
        weight[0] = 2.3653597831726	
        weight[1] = 4.4237880706787	
        weight[2] = 2.9691488742828	
        weight[3] = 5.3442072868347	
        weight[4] = 5.2983593940735	
        weight[5] = 5.2275490760803	
        weight[6] = 5.4394111633301	
        weight[7] = 5.3659925460815	
        weight[8] = 3.4170460700989	
        weight[9] = 5.2414722442627	
        weight[10] = 4.7376127243042	
        weight[11] = 5.2286224365234	
        weight[12] = 5.455126285553	
        weight[13] = 4.3019247055054	
        weight[14] = 5.4264230728149	
        weight[15] = 5.4331531524658	
        weight[16] = 5.433765411377	
        weight[17] = 5.4631009101868	
        weight[18] = 5.3947434425354
    else:
        weight[0] = 2.8149201869965	
        weight[1] = 6.9850029945374	
        weight[2] = 3.7890393733978	
        weight[3] = 9.9428062438965	
        weight[4] = 9.7702074050903	
        weight[5] = 9.5110931396484	
        weight[6] = 10.311357498169	
        weight[7] = 10.026463508606	
        weight[8] = 4.6323022842407	
        weight[9] = 9.5608062744141	
        weight[10] = 7.8698215484619	
        weight[11] = 9.5168733596802	
        weight[12] = 10.373730659485	
        weight[13] = 6.6616044044495	
        weight[14] = 10.260489463806	
        weight[15] = 10.287888526917	
        weight[16] = 10.289801597595	
        weight[17] = 10.405355453491	
        weight[18] = 10.138095855713	

    weight[19] = 0

    assert os.path.exists(args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)#1024)
    co_transform_val = MyCoTransform(enc, augment=False, height=args.height)#1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train',50)
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val',100)
    print(len(dataset_train))
    loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
    loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)
#     print(list(enumerate(loader)))
    if args.cuda:
        weight = weight.cuda()
    criterion = CrossEntropyLoss2d(weight)

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"    

    if (not os.path.exists(automated_log_path)):    #dont add first line if it exists 
        with open(automated_log_path, "a") as myfile:
            myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))


    #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=1e-4)      ## scheduler 2

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value. 
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(filenameCheckpoint), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow((1-((epoch-1)/args.num_epochs)),0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)                             ## scheduler 2

    if args.visualize and args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(start_epoch, args.num_epochs+1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)    ## scheduler 2

        epoch_loss = []
        time_train = []
     
        doIouTrain = args.iouTrain   
        doIouVal =  args.iouVal      

        if (doIouTrain):
            iouEvalTrain = iouEval(NUM_CLASSES)

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        #print("this is me!!!!!")
        #print(len(loader))
        for step, (images, labels) in enumerate(loader):

            start_time = time.time()
            #print("this is also m")
            #print (labels.size())
            #print (np.unique(labels.numpy()))
            #print("labels: ", np.unique(labels[0].numpy()))
            #labels = torch.ones(4, 1, 512, 1024).long()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)

            #print("targets", np.unique(targets[:, 0].cpu().data.numpy()))
            #print("This is me on traget")
            #print(np.min(targets.cpu().detach().numpy()))
            #print("This is me after target")
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            #print("This is me on loss")
            #print(loss)
            #print("This is me after loss")
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.cpu().detach().numpy().item())
            time_train.append(time.time() - start_time)

            if (doIouTrain):
                #start_time_iou = time.time()
                iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)      

            #print(outputs.size())
            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                #image[0] = image[0] * .229 + .485
                #image[1] = image[1] * .224 + .456
                #image[2] = image[2] * .225 + .406
                #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):   #merge gpu tensors
                    board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'output (epoch: {epoch}, step: {step})')
                else:
                    board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                    f'target (epoch: {epoch}, step: {step})')
                print ("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                        "// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size))

            
        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        
        iouTrain = 0
        if (doIouTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain)+'{:0.2f}'.format(iouTrain*100) + '\033[0m'
            print ("EPOCH IoU on TRAIN set: ", iouStr, "%")  

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES)

        for step, (images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images, volatile=True)    #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc) 

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.cpu().detach().numpy().item())
            time_val.append(time.time() - start_time)


            #Add batch to calculate TP, FP and FN for iou estimation
            if (doIouVal):
                #start_time_iou = time.time()
                iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                board.image(image, f'VAL input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):   #merge gpu tensors
                    board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'VAL output (epoch: {epoch}, step: {step})')
                else:
                    board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'VAL output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                    f'VAL target (epoch: {epoch}, step: {step})')
                print ("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                        "// Avg time/img: %.4f s" % (sum(time_val) / len(time_val) / args.batch_size))
                       

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        iouVal = 0
        if (doIouVal):
            iouVal, iou_classes = iouEvalVal.getIoU()
            iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
            print ("EPOCH IoU on VAL set: ", iouStr, "%") 
           

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = -average_epoch_loss_val
        else:
            current_acc = iouVal 
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
            filenameBest = savedir + '/model_best_enc.pth.tar'    
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'
            filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')
            if (not enc):
                with open(savedir + "/best.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))   
            else:
                with open(savedir + "/best_encoder.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))           

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
    
    return(model)   #return model (convenience for encoder-decoder training)
Exemplo n.º 4
0
def main():
    best_acc = 0

    co_transform = MyCoTransform(ENCODER_ONLY,
                                 augment=True,
                                 height=IMAGE_HEIGHT)
    co_transform_val = MyCoTransform(ENCODER_ONLY,
                                     augment=False,
                                     height=IMAGE_HEIGHT)

    #train data
    dataset_train = idd_lite(DATA_ROOT, co_transform, 'train')
    print("length of training set: ", len(dataset_train))
    #test data
    dataset_val = idd_lite(DATA_ROOT, co_transform_val, 'val')
    print("length of validation set: ", len(dataset_val))

    # NOTE: PLEASE DON'T CHANGE batch_size and num_workers here. We have limited resources.
    loader_train = DataLoader(dataset_train,
                              num_workers=NUM_WORKERS,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=NUM_WORKERS,
                            batch_size=BATCH_SIZE,
                            shuffle=True)
    dataiter = iter(loader_val)
    seven_val_images = []
    for i in range(7):
        (val_image_A, val_image_B, val_image_labels) = dataiter.next()
        seven_val_images.append(
            (val_image_A.to(device), val_image_B.to(device)))
        cv2.imwrite(
            os.path.join(OUTPUT_DIR, str(i), 'A.tiff'),
            np.rollaxis((val_image_A[0, :, :, :].squeeze().cpu().numpy() *
                         255).astype('uint8'), 0, 3))
        cv2.imwrite(
            os.path.join(OUTPUT_DIR, str(i), 'B.tiff'),
            np.rollaxis((val_image_B[0, :, :, :].squeeze().cpu().numpy() *
                         255).astype('uint8'), 0, 3))
        cv2.imwrite(os.path.join(OUTPUT_DIR, str(i), 'label.tiff'),
                    (val_image_labels[0, :, :, :].squeeze().cpu().numpy()
                     ).astype('uint8'))

    # ## Cross Entropy  Loss ##
    # Negative Log Loss   |Plot of -log(x) vs x
    # - | -
    # ![alt](img/nll.png) | ![alt](img/nll-log.png)
    #
    # The negative log-likelihood becomes unhappy at smaller values, where it can reach infinite unhappiness (that’s too sad), and becomes less unhappy at larger values. Because we are summing the loss function to all the correct classes, what’s actually happening is that whenever the network assigns high confidence at the correct class, the unhappiness is low, but when the network assigns low confidence at the correct class, the unhappiness is high.

    # In[12]:

    criterion = torch.nn.CrossEntropyLoss()

    #get some random training images
    print("length of training couples: ", len(loader_train))
    print(len(loader_val))
    dataiter = iter(loader_train)
    (images, images1, labels, filename) = dataiter.next()  #ChangedByUs
    # for step, (images, labels) in enumerate(loader_train):
    # plt.figure()
    # plt.imshow(ToPILImage()(images[0].cpu()))
    # plt.figure()
    # plt.imshow(ToPILImage()(Colorize()(labels[0].cpu())))
    # break

    # ## Model ##

    model_file = importlib.import_module('erfnet')
    model = model_file.Net(NUM_CLASSES).to(device)

    # ### Optimizer ###

    # We use adam optimizer. It can be replaced with SGD and other optimizers
    optimizer = Adam(model.parameters(),
                     5e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=1e-4)
    start_epoch = 1

    print("device used: ", device)

    # ### Training Procedure ###
    softmax = torch.nn.Softmax(dim=1)

    steps_loss = 50
    my_start_time = time.time()
    for epoch in range(start_epoch, NUM_EPOCHS + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        epoch_loss = []
        time_train = []

        doIouTrain = IOUTRAIN
        doIouVal = IOUVAL

        if (doIouTrain):
            iouEvalTrain = iouEval(NUM_CLASSES)

        model.train()
        for step, (images, images1, labels,
                   filename) in enumerate(loader_train):  #ChangedByUs
            start_time = time.time()
            # inputs = [images.to(device), images1.to(device)] #ChangedByUs
            inputs = images.to(device)
            inputs1 = images1.to(device)  #ChangedByUs
            targets = labels.to(device)
            targets_orig = targets.clone()
            targets[targets_orig >= 128] = 1  # ChangedByUs
            targets[targets_orig < 128] = 0  # ChangedByUs
            #for x_u in targets.unique():
            #    print(int(x_u), ' appears ', int(torch.stack([(targets==x_u).sum()])), ' times.\n')
            outputs = model([inputs, inputs1], only_encode=ENCODER_ONLY)
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.item())
            time_train.append(time.time() - start_time)

            if (doIouTrain):
                #start_time_iou = time.time()
                iouEvalTrain.addBatch(
                    outputs.max(1)[1].unsqueeze(1).data, targets.data)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            # print statistics
            if steps_loss > 0 and step % steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    'loss: {average:', average, '} (epoch: {', epoch,
                    '}, step: {', step, '})', "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / BATCH_SIZE))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)

        iouTrain = 0
        if (doIouTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain) + '{:0.2f}'.format(
                iouTrain * 100) + '\033[0m'
            print("EPOCH IoU on TRAIN set: ", iouStr, "%")

        #save one image per epoch
        # if USE_CUDA:
        #     first_val_image_A = first_val_image_A.to(device)
        #     first_val_image_B = first_val_image_B.to(device)  # ChangedByUs
        #     first_val_image_labels = first_val_image_labels.to(device)
        #
        # inputs = first_val_image_A.to(device)
        # inputs1 = first_val_image_B.to(device)  # ChangedByUs
        for i in range(len(seven_val_images)):
            outputs_val = model(
                [seven_val_images[i][0].cuda(), seven_val_images[i][1].cuda()],
                only_encode=ENCODER_ONLY)
            outputs_val = softmax(outputs_val)
            cv2.imwrite(
                os.path.join(OUTPUT_DIR, str(i),
                             'epoch' + str(epoch) + '_output.tiff'),
                (((outputs_val[0, 1, :, :] > 0.5) *
                  255).squeeze().cpu().numpy()).astype('uint8'))

    my_end_time = time.time()
    print(my_end_time - my_start_time)

    print(
        'loss: {average:', average, '} (epoch: {', epoch, '}, step: {', step,
        '})', "// Avg time/img: %.4f s" %
        (sum(time_train) / len(time_train) / BATCH_SIZE))

    # # ### Validation ###
    # #Validate on val images after each epoch of training
    # print("----- VALIDATING - EPOCH", epoch, "-----")
    # model.eval()
    # epoch_loss_val = []
    # time_val = []
    #
    # if (doIouVal):
    #     iouEvalVal = iouEval(NUM_CLASSES)
    #
    # for step, (images, labels) in enumerate(loader_val):
    #     start_time = time.time()
    #
    #     inputs = images.to(device)
    #     targets = labels.to(device)
    #
    #     with torch.no_grad():
    #         outputs = model(inputs, only_encode=ENCODER_ONLY)
    #         #outputs = model(inputs)
    #     loss = criterion(outputs, targets[:, 0])
    #     epoch_loss_val.append(loss.item())
    #     time_val.append(time.time() - start_time)
    #
    #
    #     #Add batch to calculate TP, FP and FN for iou estimation
    #     if (doIouVal):
    #         #start_time_iou = time.time()
    #         iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
    #         #print ("Time to add confusion matrix: ", time.time() - start_time_iou)
    #
    #     if steps_loss > 0 and step % steps_loss == 0:
    #         average = sum(epoch_loss_val) / len(epoch_loss_val)
    #         print('VAL loss: {average:',average,'} (epoch: {',epoch,'}, step: {',step,'})',
    #                 "// Avg time/img: %.4f s" % (sum(time_val) / len(time_val) / BATCH_SIZE))
    #
    #
    # average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
    #
    # iouVal = 0
    # if (doIouVal):
    #
    #     iouVal, iou_classes = iouEvalVal.getIoU()
    #     print(iou_classes)
    #     iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
    #     print ("EPOCH IoU on VAL set: ", iouStr, "%")

    #
    #  ### Visualizing the Output###
    torch.save(model.state_dict(), r'C:\Users\inbal.tlgip\modelsave.pt')
    # Qualitative Analysis

    ##################### calc iou on test data #####################
    dataset_test = idd_lite(DATA_ROOT, co_transform_val, 'test')
    loader_test = DataLoader(dataset_test,
                             num_workers=NUM_WORKERS,
                             batch_size=BATCH_SIZE,
                             shuffle=True)
    # dataiter = iter(loader_test)
    # (val_image_A, val_image_B, val_image_labels) = dataiter.next()
    for step, (images, images1, labels, filename) in enumerate(loader_test):

        outputs_val = model([images.cuda(), images1.cuda()],
                            only_encode=ENCODER_ONLY)
        outputs_val = softmax(outputs_val)
        cv2.imwrite(
            r'D:\Users Data\inbal.tlgip\Project\output_images\test_output/' +
            str(step) + '.tiff',
            (((outputs_val[0, 1, :, :] > 0.5) *
              255).squeeze().cpu().numpy()).astype('uint8'))
Exemplo n.º 5
0
    def train_segmentation(self):

        with self.sess.as_default():
            self.logger.info('start training segmentation net')

            print('Start training for {} epoches, {} steps per epoch'.format(
                self.__Param["epochs_num"],
                self.DataManager_train.number_batch))
            best_loss = 10000
            for i in range(self.model.step,
                           self.__Param["epochs_num"] + self.model.step):
                trainIoU = iouEval(self.__Param["batch_size"])
                print('Epoch {}:'.format(i))
                with tqdm(total=self.DataManager_train.number_batch) as pbar:
                    # epoch start
                    iter_loss = 0.0
                    num_step = 0.0
                    accuracy = 0.0
                    for batch in range(self.DataManager_train.number_batch):
                        # run_options = tf.RunOptions()
                        # run_metadata = tf.RunMetadata()
                        # batch start

                        # print(self.sess.run(
                        #     self.sess.graph.get_tensor_by_name('segmentation/MixnetBlock_0_bn1/moving_mean:0')))
                        # print(self.sess.run(
                        #     self.sess.graph.get_tensor_by_name('segmentation/MixnetBlock_0_bn1/moving_variance:0')))

                        img_batch, mask_batch, label_batch, _ = self.sess.run(
                            self.DataManager_train.next_batch)

                        a, b, _, loss_value_batch = self.sess.run(
                            [
                                self.model.mask, self.model.mask_out,
                                self.model.optimize_segment,
                                self.model.segmentation_loss
                            ],
                            # self.model.merged],
                            feed_dict={
                                self.model.Image: img_batch,
                                self.model.mask: mask_batch,
                                self.model.label: label_batch,
                                self.model.is_training_seg:
                                TRAIN_MODE_IN_TRAIN,
                                self.model.is_training_dec: False
                            })
                        # options=run_options,
                        # run_metadata=run_metadata)
                        trainIoU.addBatch(a, b)

                        # self.model.train_writer.add_run_metadata(run_metadata, 'step%03d' % batch)
                        # iter_loss = (iter_loss*(num_step)+ loss_value_batch)/(num_step+1)
                        iter_loss += loss_value_batch
                        num_step = num_step + 1
                        pbar.update(1)
                        # self.model.train_writer.add_summary(summary, batch)
                pbar.close()
                iter_loss /= num_step
                iou = trainIoU.getIoU()
                self.logger.info(
                    'epoch:[{}] ,train_mode, loss: {}, accuracy: {}'.format(
                        self.model.step, iter_loss, accuracy))
                # 验证
                self.model.step += 1
                # if i % self.__Param["valid_frequency"] == 0 and i>0:
                train_iou = trainIoU.getIoU()
                val_loss, val_iou = self.valid_segmentation()
                print('train_loss:{}, train_iou:{},  val_loss:{}, val_iou:{}'.
                      format(iter_loss, train_iou, val_loss, val_iou))

                # 保存模型
                if i % self.__Param["save_frequency"] == 0 or i == self.__Param[
                        "epochs_num"] + self.model.step - 1:
                    # if val_loss < best_loss:
                    # best_loss = val_loss
                    # print('reduce loss to {}, saving model at epoch:{}'.format(val_loss, i))
                    self.model.save()
Exemplo n.º 6
0
def main(args):

    modelpath = args.loadDir + args.loadModel
    weightspath = args.loadDir + args.loadWeights

    print("Loading model: " + modelpath)
    print("Loading weights: " + weightspath)

    model = ERFNet(NUM_CLASSES)

    model = torch.nn.DataParallel(model)
    if (not args.cpu):
        model = model.cuda()

    def load_my_state_dict(
            model, state_dict
    ):  #custom function to load model when not all dict elements
        own_state = model.state_dict()
        for name, param in state_dict.items():
            if name not in own_state:
                print(name, " not loaded")
                continue
            own_state[name].copy_(param)
        return model

    model = load_my_state_dict(model, torch.load(weightspath))
    print("Model and weights LOADED successfully")

    model.eval()

    if (not os.path.exists(args.datadir)):
        print("Error: datadir could not be loaded")

    loader = DataLoader(cityscapes(args.datadir,
                                   input_transform_cityscapes,
                                   target_transform_cityscapes,
                                   subset=args.subset),
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=False)

    iouEvalVal = iouEval(NUM_CLASSES)

    start = time.time()

    for step, (images, labels, filename, filenameGt) in enumerate(loader):
        if (not args.cpu):
            images = images.cuda()
            labels = labels.cuda()

        inputs = Variable(images, volatile=True)
        outputs = model(inputs)

        iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, labels)

        filenameSave = filename[0].split("leftImg8bit/")[1]

        print(step, filenameSave)

    iouVal, iou_classes = iouEvalVal.getIoU()

    iou_classes_str = []
    for i in range(iou_classes.size(0)):
        iouStr = getColorEntry(iou_classes[i]) + '{:0.2f}'.format(
            iou_classes[i] * 100) + '\033[0m'
        iou_classes_str.append(iouStr)

    print("---------------------------------------")
    print("Took ", time.time() - start, "seconds")
    print("=======================================")
    #print("TOTAL IOU: ", iou * 100, "%")
    print("Per-Class IoU:")
    print(iou_classes_str[0], "Road")
    print(iou_classes_str[1], "sidewalk")
    print(iou_classes_str[2], "building")
    print(iou_classes_str[3], "wall")
    print(iou_classes_str[4], "fence")
    print(iou_classes_str[5], "pole")
    print(iou_classes_str[6], "traffic light")
    print(iou_classes_str[7], "traffic sign")
    print(iou_classes_str[8], "vegetation")
    print(iou_classes_str[9], "terrain")
    print(iou_classes_str[10], "sky")
    print(iou_classes_str[11], "person")
    print(iou_classes_str[12], "rider")
    print(iou_classes_str[13], "car")
    print(iou_classes_str[14], "truck")
    print(iou_classes_str[15], "bus")
    print(iou_classes_str[16], "train")
    print(iou_classes_str[17], "motorcycle")
    print(iou_classes_str[18], "bicycle")
    print("=======================================")
    iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(iouVal * 100) + '\033[0m'
    print("MEAN IoU: ", iouStr, "%")
Exemplo n.º 7
0
def train(args, model, enc=False):
    best_acc = 0

    #TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
    #create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

    weight = torch.ones(NUM_CLASSES)
    if (enc):        
        weight[0] = 4.38133159
        weight[1] = 1.29574148
    else:
        weight[0] = 4.40513628
        weight[1] = 1.293674
        
    if (enc):
        up = torch.nn.Upsample(scale_factor=16, mode='bilinear')
    else:
        up = torch.nn.Upsample(scale_factor=2, mode='bilinear')
        
    if args.cuda:
        up = up.cuda()

    assert os.path.exists(args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)#1024)
    co_transform_val = MyCoTransform(enc, augment=False, height=args.height)#1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
    loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

    if args.cuda:
        weight = weight.cuda()
  
    if args.weighted:
        criterion = CrossEntropyLoss2d(weight)
    else:            
        criterion = CrossEntropyLoss2d()
        
    print(type(criterion))

    savedir = args.savedir

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"    

    if (not os.path.exists(automated_log_path)):    #dont add first line if it exists 
        with open(automated_log_path, "a") as myfile:
            myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))


    #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=1e-4)      ## scheduler 2

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value. 
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(filenameCheckpoint), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow((1-((epoch-1)/args.num_epochs)),0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)                             ## scheduler 2

    if args.visualize and args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(start_epoch, args.num_epochs+1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)    ## scheduler 2

        epoch_loss = []
        time_train = []
     
        doIouTrain = args.iouTrain   
        doIouVal =  args.iouVal      

        if (doIouTrain):
            iouEvalTrain = iouEval(NUM_CLASSES, args.ignoreindex)

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels, images_orig, labels_orig) in enumerate(loader):

            start_time = time.time()
            #print (labels.size())
            #print (np.unique(labels.numpy()))
            #print("labels: ", np.unique(labels[0].numpy()))
            #labels = torch.ones(4, 1, 512, 1024).long()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)

            #print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if (doIouTrain):
                #start_time_iou = time.time()
                upsampledOutputs = up(outputs)
                iouEvalTrain.addBatch(upsampledOutputs.max(1)[1].unsqueeze(1).data, labels_orig)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)      

            #print(outputs.size())
            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                #image[0] = image[0] * .229 + .485
                #image[1] = image[1] * .224 + .456
                #image[2] = image[2] * .225 + .406
                #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):   #merge gpu tensors
                    board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'output (epoch: {epoch}, step: {step})')
                else:
                    board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                    f'target (epoch: {epoch}, step: {step})')
                print ("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(f'loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                        "// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size))

            
        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        
        iouTrain = 0
        if (doIouTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain)+'{:0.2f}'.format(iouTrain*100) + '\033[0m'
            print ("EPOCH IoU on TRAIN set: ", iouStr, "%", iou_classes)  

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES, args.ignoreindex)

        for step, (images, labels, images_orig, labels_orig) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            inputs = Variable(images, volatile=True)    #volatile flag makes it free backward or outputs for eval
            targets = Variable(labels, volatile=True)
            outputs = model(inputs, only_encode=enc) 

            loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)


            #Add batch to calculate TP, FP and FN for iou estimation
            if (doIouVal):
                #start_time_iou = time.time()
                upsampledOutputs = up(outputs)
                iouEvalVal.addBatch(upsampledOutputs.max(1)[1].unsqueeze(1).data, labels_orig)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                board.image(image, f'VAL input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):   #merge gpu tensors
                    board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'VAL output (epoch: {epoch}, step: {step})')
                else:
                    board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                    f'VAL output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                    f'VAL target (epoch: {epoch}, step: {step})')
                print ("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                        "// Avg time/img: %.4f s" % (sum(time_val) / len(time_val) / args.batch_size))
                       

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        iouVal = 0
        if (doIouVal):
            iouVal, iou_classes = iouEvalVal.getIoU()
            iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
            print ("EPOCH IoU on VAL set: ", iouStr, "%", iou_classes) 
           

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = -average_epoch_loss_val
        else:
            current_acc = iouVal 
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
            filenameBest = savedir + '/model_best_enc.pth.tar'    
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'
            filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')
            if (not enc):
                with open(savedir + "/best.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))   
            else:
                with open(savedir + "/best_encoder.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))           

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
    
    return(model)   #return model (convenience for encoder-decoder training)
Exemplo n.º 8
0
def train(args, model, enc=False):
    global best_acc

    weight = torch.ones(NUM_CLASSES)
    weight[0] = 121.21
    weight[1] = 947.02
    weight[2] = 151.92
    weight[3] = 428.31
    weight[4] = 25.88
    weight[5] = 235.97
    weight[6] = 885.72
    weight[7] = 911.87
    weight[8] = 307.49
    weight[9] = 204.69
    weight[10] = 813.92
    weight[11] = 5.83
    weight[12] = 34.22
    weight[13] = 453.34
    weight[14] = 346.10
    weight[15] = 250.19
    weight[16] = 119.99
    weight[17] = 75.28
    weight[18] = 76.71
    weight[19] = 8.58
    weight[20] = 281.68
    weight[21] = 924.07
    weight[22] = 3.91
    weight[23] = 7.14
    weight[24] = 88.89
    weight[25] = 59.00
    weight[26] = 126.59
    weight[27] = 0

    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(enc, augment=True, height=args.height)  #1024)
    co_transform_val = MyCoTransform(enc, augment=False,
                                     height=args.height)  #1024)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        #criterion =LovaszLoss2d()
        #criterion = CrossEntropyLoss2d(weight.cuda())
        criterion = FocalLoss2d(weight.cuda())
    else:
        #criterion = LovaszLoss2d()
        #criterion = CrossEntropyLoss2d(weight)
        criterion = FocalLoss2d(weight.cuda())

    print(type(criterion))

    savedir = f'../save/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    #optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
    optimizer = Adam(model.parameters(),
                     1e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=1e-4)  ## scheduler 2

    start_epoch = 1

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    time_train_perepoch = []
    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")
        start_time_perepoch = time.time()

        scheduler.step(epoch)  ## scheduler 2

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels) in enumerate(loader):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()

            #inputs = images
            #targets= labels
            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)
            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])
            #loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        #evalIoU.printConfMatrix(confMatrix, evalIoU.args)

        time_train_perepoch.append(time.time() - start_time_perepoch)
        print("// Time per epoch: %.4f hours" %
              (sum(time_train_perepoch) / len(time_train_perepoch) / 3600.0))

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES)

        with torch.no_grad():
            for step, (images, labels) in enumerate(loader_val):
                start_time = time.time()
                if args.cuda:
                    images = images.cuda()
                    labels = labels.cuda()

                #inputs =images
                #targets=labels
                inputs = Variable(
                    images, requires_grad=False
                )  #, volatile=True)    #volatile flag makes it free backward or outputs for eval
                targets = Variable(labels,
                                   requires_grad=False)  #, volatile=True)
                outputs = model(inputs, only_encode=enc)

                loss = criterion(outputs, targets[:, 0])
                epoch_loss_val.append(loss.data[0])
                time_val.append(time.time() - start_time)

                if (doIouVal):
                    iouEvalVal.addBatch(
                        outputs.max(1)[1].unsqueeze(1).data, targets.data)

                if args.steps_loss > 0 and step % args.steps_loss == 0:
                    average = sum(epoch_loss_val) / len(epoch_loss_val)
                    print(
                        f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                        "// Avg time/img: %.4f s" %
                        (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

        # Calculate IOU scores on class level from matrix
        iouVal = 0
        iouTrain = 0
        if (doIouVal):
            iouVal, iou_classes, accVal, acc_classes = iouEvalVal.getIoU()

            print("pole    : %.6f" % (iou_classes[0] * 100.0), "%\t")
            print("slight  : %.6f" % (iou_classes[1] * 100.0), "%\t")
            print("bboard  : %.6f" % (iou_classes[2] * 100.0), "%\t")
            print("tlight  : %.6f" % (iou_classes[3] * 100.0), "%\t")
            print("car     : %.6f" % (iou_classes[4] * 100.0), "%\t")
            print("truck   : %.6f" % (iou_classes[5] * 100.0), "%\t")
            print("bicycle : %.6f" % (iou_classes[6] * 100.0), "%\t")
            print("motor   : %.6f" % (iou_classes[7] * 100.0), "%\t")
            print("bus     : %.6f" % (iou_classes[8] * 100.0), "%\t")
            print("tsignf  : %.6f" % (iou_classes[9] * 100.0), "%\t")
            print("tsignb  : %.6f" % (iou_classes[10] * 100.0), "%\t")
            print("road    : %.6f" % (iou_classes[11] * 100.0), "%\t")
            print("sidewalk: %.6f" % (iou_classes[12] * 100.0), "%\t")
            print("curbcut : %.6f" % (iou_classes[13] * 100.0), "%\t")
            print("crosspln: %.6f" % (iou_classes[14] * 100.0), "%\t")
            print("bikelane: %.6f" % (iou_classes[15] * 100.0), "%\t")
            print("curb    : %.6f" % (iou_classes[16] * 100.0), "%\t")
            print("fence   : %.6f" % (iou_classes[17] * 100.0), "%\t")
            print("wall    : %.6f" % (iou_classes[18] * 100.0), "%\t")
            print("building: %.6f" % (iou_classes[19] * 100.0), "%\t")
            print("person  : %.6f" % (iou_classes[20] * 100.0), "%\t")
            print("rider   : %.6f" % (iou_classes[21] * 100.0), "%\t")
            print("sky     : %.6f" % (iou_classes[22] * 100.0), "%\t")
            print("vege    : %.6f" % (iou_classes[23] * 100.0), "%\t")
            print("terrain : %.6f" % (iou_classes[24] * 100.0), "%\t")
            print("markings: %.6f" % (iou_classes[25] * 100.0), "%\t")
            print("crosszeb: %.6f" % (iou_classes[26] * 100.0), "%\t")

            iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(
                iouVal * 100) + '\033[0m'
            print("EPOCH IoU on VAL set: ", iouStr, "%")

            print("pole    : %.6f" % (acc_classes[0] * 100.0), "%\t")
            print("slight  : %.6f" % (acc_classes[1] * 100.0), "%\t")
            print("bboard  : %.6f" % (acc_classes[2] * 100.0), "%\t")
            print("tlight  : %.6f" % (acc_classes[3] * 100.0), "%\t")
            print("car     : %.6f" % (acc_classes[4] * 100.0), "%\t")
            print("truck   : %.6f" % (acc_classes[5] * 100.0), "%\t")
            print("bicycle : %.6f" % (acc_classes[6] * 100.0), "%\t")
            print("motor   : %.6f" % (acc_classes[7] * 100.0), "%\t")
            print("bus     : %.6f" % (acc_classes[8] * 100.0), "%\t")
            print("tsignf  : %.6f" % (acc_classes[9] * 100.0), "%\t")
            print("tsignb  : %.6f" % (acc_classes[10] * 100.0), "%\t")
            print("road    : %.6f" % (acc_classes[11] * 100.0), "%\t")
            print("sidewalk: %.6f" % (acc_classes[12] * 100.0), "%\t")
            print("curbcut : %.6f" % (acc_classes[13] * 100.0), "%\t")
            print("crosspln: %.6f" % (acc_classes[14] * 100.0), "%\t")
            print("bikelane: %.6f" % (acc_classes[15] * 100.0), "%\t")
            print("curb    : %.6f" % (acc_classes[16] * 100.0), "%\t")
            print("fence   : %.6f" % (acc_classes[17] * 100.0), "%\t")
            print("wall    : %.6f" % (acc_classes[18] * 100.0), "%\t")
            print("building: %.6f" % (acc_classes[19] * 100.0), "%\t")
            print("person  : %.6f" % (acc_classes[20] * 100.0), "%\t")
            print("rider   : %.6f" % (acc_classes[21] * 100.0), "%\t")
            print("sky     : %.6f" % (acc_classes[22] * 100.0), "%\t")
            print("vege    : %.6f" % (acc_classes[23] * 100.0), "%\t")
            print("terrain : %.6f" % (acc_classes[24] * 100.0), "%\t")
            print("markings: %.6f" % (acc_classes[25] * 100.0), "%\t")
            print("crosszeb: %.6f" % (acc_classes[26] * 100.0), "%\t")

            accStr = getColorEntry(accVal) + '{:0.2f}'.format(
                accVal * 100) + '\033[0m'
            print("EPOCH ACC on VAL set: ", accStr, "%")

        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = average_epoch_loss_val
        else:
            current_acc = iouVal
        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)
        if (enc and epoch == args.num_epochs):
            best_acc = 0

        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth'
            filenameBest = savedir + '/model_best_enc.pth'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth'
            filenameBest = savedir + '/model_best.pth'
        save_checkpoint({
            'state_dict': model.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best_each.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best_each.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        #if (True) #(is_best):
        torch.save(model.state_dict(), filenamebest)
        print(f'save: {filenamebest} (epoch: {epoch})')
        filenameSuperBest = f'{savedir}/model_superbest.pth'
        if (is_best):
            torch.save(model.state_dict(), filenameSuperBest)
            print(f'saving superbest')
        if (not enc):
            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))
        else:
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                             (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))

    return (model)  #return model (convenience for encoder-decoder training)
Exemplo n.º 9
0
def train(savedir,
          model,
          dataloader_train,
          dataloader_eval,
          criterion,
          optimizer,
          args,
          enc=False):
    min_loss = float('inf')

    # use tensorboard
    writer = SummaryWriter(log_dir=savedir)
    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value.
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(
            filenameCheckpoint
        ), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
    lambda1 = lambda epoch: pow(
        (1 - ((epoch - 1) / args.num_epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer,
                                      lr_lambda=lambda1)  ## scheduler 2

    if args.visualize and args.steps_plot > 0:
        board = Dashboard(args.port)

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step(epoch)

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        if (doIouTrain):
            iouEvalTrain = iouEval(mean_and_var)

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])

        model.train()
        for step, (images, labels, _) in enumerate(dataloader_train):

            start_time = time.time()
            #print (labels.size())
            #print (np.unique(labels.numpy()))
            #print("labels: ", np.unique(labels[0].numpy()))
            #labels = torch.ones(4, 1, 512, 1024).long()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
            #print("image: ", images.size())
            #print("labels: ", labels.size())
            inputs = Variable(images)
            targets = Variable(labels)
            outputs = model(inputs, only_encode=enc)

            # print("output: ", outputs.size()) #TODO
            # print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

            optimizer.zero_grad()
            loss = criterion(outputs, targets[:, 0])

            loss.backward()
            optimizer.step()

            epoch_loss.append(loss)
            time_train.append(time.time() - start_time)

            if (doIouTrain):
                #start_time_iou = time.time()
                iouEvalTrain.addBatch(
                    outputs.max(1)[1].unsqueeze(1).data, targets.data)
                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

            #print(outputs.size())
            if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
                start_time_plot = time.time()
                image = inputs[0].cpu().data
                #image[0] = image[0] * .229 + .485
                #image[1] = image[1] * .224 + .456
                #image[2] = image[2] * .225 + .406
                #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
                board.image(image, f'input (epoch: {epoch}, step: {step})')
                if isinstance(outputs, list):  #merge gpu tensors
                    board.image(
                        color_transform(
                            outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                else:
                    board.image(
                        color_transform(
                            outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                        f'output (epoch: {epoch}, step: {step})')
                board.image(color_transform(targets[0].cpu().data),
                            f'target (epoch: {epoch}, step: {step})')
                print("Time to paint images: ", time.time() - start_time_plot)
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batch_size))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)
        writer.add_scalar('train_loss', average_epoch_loss_train, epoch)

        iouTrain = 0
        if (doIouTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain) + '{:0.2f}'.format(
                iouTrain * 100) + '\033[0m'
            print("EPOCH IoU on TRAIN set: ", iouStr, "%")

        #Validate on 500 val images after each epoch of training
        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(mean_and_var)

        for step, (images, labels, _) in enumerate(dataloader_eval):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
            optimizer.zero_grad()
            inputs = Variable(images)
            targets = Variable(labels)
            with torch.no_grad():
                outputs = model(inputs, only_encode=enc)

                loss = criterion(outputs, targets[:, 0])
            epoch_loss_val.append(loss.data)
            time_val.append(time.time() - start_time)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print(
                    f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_val) / len(time_val) / args.batch_size))

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
        #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed
        writer.add_scalar('eval_loss', average_epoch_loss_val, epoch)

        iouVal = 0
        if (doIouVal):
            iouVal, iou_classes = iouEvalVal.getIoU()
            iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(
                iouVal * 100) + '\033[0m'
            print("EPOCH IoU on VAL set: ", iouStr, "%")

        is_best = average_epoch_loss_val < min_loss
        min_loss = min(min_loss, average_epoch_loss_val)
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
            filenameBest = savedir + '/model_best_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'
            filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'best_acc': min_loss,
                'optimizer': optimizer.state_dict(),
            }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        if (enc):
            filename = f'{savedir}/model_encoder-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_encoder_best.pth'
        else:
            filename = f'{savedir}/model-{epoch:03}.pth'
            filenamebest = f'{savedir}/model_best.pth'
        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')
            if (not enc):
                with open(savedir + "/best.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))
            else:
                with open(savedir + "/best_encoder.txt", "w") as myfile:
                    myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                 (epoch, iouVal))

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                         (epoch, average_epoch_loss_train,
                          average_epoch_loss_val, iouTrain, iouVal, usedLr))
    writer.close()
    torch.save(model.state_dict(), f'{savedir}/weight_final.pth')
    return (model)  #return model (convenience for encoder-decoder training)
Exemplo n.º 10
0
def train(args, model, classNum, epochNum, encoderOnly=False):

    start_epoch = 1
    best_acc = 0

    # === Dataset Processing === #
    if args.dataset == 'cityscapes':
        co_transform = MyCoTransform(encoderOnly,
                                     dataAugment=True,
                                     height=args.height)
        co_transform_val = MyCoTransform(encoderOnly,
                                         dataAugment=False,
                                         height=args.height)
        dataDir = '/media/commlab/TenTB/swhung/SegNet/Cityscapes/'
        dataset_train = cityscapes(dataDir, co_transform, 'train')
        dataset_val = cityscapes(dataDir, co_transform_val, 'val')
        saveDir = f'../save/{args.saveDir}'  # #

    loader_train = DataLoader(dataset_train,
                              num_workers=args.num_workers,
                              batch_size=args.batchSize,
                              shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batchSize,
                            shuffle=False)

    # === Optimization Setting === #

    # ** optimizer
    if args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=1e-4)
    elif args.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=1e-4)

# ** learing rate scheduler
    my_lambda = lambda epoch: pow((1 - ((epoch - 1) / epochNum)), 0.9)  # poly
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=my_lambda)

    # ** apply loss function
    classWeight = getClassWeight(args.dataset, classNum)
    if args.cuda:
        classWeight = classWeight.cuda()

    criterion = CrossEntropyLoss2d(weight=classWeight, ignore_index=19)

    # === save information in .txt files === #
    if (encoderOnly):
        automated_log_path = saveDir + "/automated_log_encoder.txt"
        modeltxtpath = saveDir + "/model_txt_encoder.txt"
    else:
        automated_log_path = saveDir + "/automated_log.txt"
        modeltxtpath = saveDir + "/model_txt.txt"

    if (not os.path.exists(automated_log_path)
        ):  # do not add first line if it exists
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

# === Training === #
    for epoch in range(start_epoch, epochNum + 1):

        print("----- TRAINING - EPOCH", epoch, "-----")

        model.train()

        scheduler.step(epoch - 1)

        epoch_loss = []
        time_train = []

        if (args.doEvalTrain):
            iouEvalTrain = iouEval(classNum)

        usedLr = 0
        for param_group in optimizer.param_groups:
            print("learning rate: ", param_group['lr'])
            usedLr = float(param_group['lr'])

# ** training iteration
        for iter, (images, labels) in enumerate(loader_train):
            start_time = time.time()

            slice = torch.split(images, 1, 1)
            rgb = torch.cat((slice[0], slice[1], slice[2]), 1)
            d = torch.cat((slice[3], slice[4]), 1)  #depth and luminance

            if args.cuda:
                rgb_inputs = rgb.cuda()
                d_input = d.cuda()
                targets = labels.cuda()

            img_size = list(targets.size())[2:4]

            # run the model
            if args.onlyWholeNet:
                outputs = model(inputs)
            else:
                outputs = model(rgb_inputs, d_input, only_encoder=encoderOnly)

# run the back-propagation
            loss = criterion(outputs, targets[:, 0])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.item())
            time_train.append(time.time() - start_time)

            if (args.doEvalTrain):
                iouEvalTrain.addBatch(
                    outputs.max(1)[1].unsqueeze(1).data, targets.data)

# print the training loss information
            if args.iter_loss > 0 and iter % args.iter_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
                print(
                    f'loss: {average:0.4} (epoch: {epoch}, iter: {iter})',
                    "// Avg time/img: %.4f s" %
                    (sum(time_train) / len(time_train) / args.batchSize))

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)

        iouTrain = 0
        if (args.doEvalTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain) + '{:0.2f}'.format(
                iouTrain * 100) + '\033[0m'
            print("EPOCH IoU on TRAIN set: ", iouStr, "%")

        if epoch <= 10 or epoch >= 70:
            with torch.no_grad():

                # Validate on 500 val images after each epoch of training
                print("----- VALIDATING - EPOCH", epoch, "-----")

                model.eval()

                epoch_loss_val = []
                time_val = []

                if (args.doEvalVal):
                    iouEvalVal = iouEval(classNum)

# ** valadation iteration
                for iter, (images, labels) in enumerate(loader_val):

                    start_time = time.time()

                    slice = torch.split(images, 1, 1)
                    rgb = torch.cat((slice[0], slice[1], slice[2]), 1)
                    d = torch.cat((slice[3], slice[4]),
                                  1)  #depth and luminance

                    if args.cuda:
                        rgb_inputs = rgb.cuda()
                        d_input = d.cuda()
                        targets = labels.cuda()

                    img_size = list(targets.size())[2:4]

                    # run the model
                    if args.onlyWholeNet:
                        outputs = model(inputs)
                    else:
                        outputs = model(rgb_inputs,
                                        d_input,
                                        only_encoder=encoderOnly)

                    loss = criterion(outputs, targets[:, 0])

                    epoch_loss_val.append(loss.item())
                    time_val.append(time.time() - start_time)

                    # Add batch to calculate TP, FP and FN for iou estimation
                    if (args.doEvalVal):
                        iouEvalVal.addBatch(
                            outputs.max(1)[1].unsqueeze(1).data, targets.data)

# print the valadation loss information
                    if args.iter_loss > 0 and iter % args.iter_loss == 0:
                        average = sum(epoch_loss_val) / len(epoch_loss_val)
                        print(
                            f'VAL loss: {average:0.4} (epoch: {epoch}, iter: {iter})',
                            "// Avg time/img: %.4f s" %
                            (sum(time_val) / len(time_val) / args.batchSize))

            average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)

            # print epoch val IoU accuracy
            iouVal = 0
            if (args.doEvalVal):
                iouVal, iou_classes = iouEvalVal.getIoU()
                iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(
                    iouVal * 100) + '\033[0m'
                print("EPOCH IoU on VAL set: ", iouStr, "%")

            # remember best valIoU and save checkpoint
            if iouVal == 0:
                current_acc = average_epoch_loss_val
            else:
                current_acc = iouVal

            is_best = current_acc > best_acc
            best_acc = max(current_acc, best_acc)

            if encoderOnly:
                filenameCheckpoint = saveDir + '/checkpoint_enc.pth.tar'
                filenameBest = saveDir + '/model_best_encoder.pth.tar'
            else:
                filenameCheckpoint = saveDir + '/checkpoint.pth.tar'
                filenameBest = saveDir + '/model_best.pth.tar'

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': str(model),
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict(),
                }, is_best, filenameCheckpoint, filenameBest)

            if (encoderOnly):
                filename = f'{saveDir}/model_encoder-{epoch:03}.pth'
                filenamebest = f'{saveDir}/model_best_encoder.pth'
            else:
                filename = f'{saveDir}/model-{epoch:03}.pth'
                filenamebest = f'{saveDir}/model_best.pth'

# save model after some epochs
            if args.epochs_save > 0 and iter > 0 and iter % args.epochs_save == 0:
                torch.save(model.state_dict(), filename)
                print(f'save: {filename} (epoch: {epoch})')

# save the best model
            if (is_best):
                torch.save(model.state_dict(), filenamebest)
                print(f'save: {filenamebest} (epoch: {epoch})')

                if (not encoderOnly):
                    with open(saveDir + "/best_IoU.txt", "w") as myfile:
                        myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                     (epoch, iouVal))
                else:
                    with open(saveDir + "/best_IoU_encoder.txt",
                              "w") as myfile:
                        myfile.write("Best epoch is %d, with Val-IoU= %.4f" %
                                     (epoch, iouVal))

# save information in .txt files
#SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
#Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
            with open(automated_log_path, "a") as myfile:
                myfile.write(
                    "\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                    (epoch, average_epoch_loss_train, average_epoch_loss_val,
                     iouTrain, iouVal, usedLr))

    return model  # return model (convenience for encoder-decoder training)
Exemplo n.º 11
0
def train(args, get_dataset, model, enc=False):
    best_acc = 0

    num_epochs = 10 if args.debug else args.num_epochs

    n_gpus = torch.cuda.device_count()
    print("\nWorking with {} GPUs".format(n_gpus))

    datasets = args.datasets

    entropy = (args.alpha + args.beta) > 0

    if entropy:
        assert len(
            datasets
        ) > 1, "Entropy Module undefined with single dataset. Exiting ... "

    NUM_LABELS = get_dataset.num_labels

    dataset_train = {
        dname: get_dataset(dname, 'train', args.num_samples)
        for dname in datasets
    }
    dataset_val = {dname: get_dataset(dname, 'val', 100) for dname in datasets}
    # dataset_unlabeled = {dname: get_dataset(dname, co_transform, 'train_extra' , mode='unlabeled') for dname in datasets}
    dataset_unlabeled = {
        dname: get_dataset(dname, 'train', mode='unlabeled')
        for dname in datasets
    }

    if entropy:

        n_unlabeled = np.max(
            [len(dataset_unlabeled[dname]) for dname in datasets])

    print("Working with {} Dataset(s):".format(len(datasets)))
    for key in datasets:
        print(
            "{}: Unlabeled images {}, Training on {} images, Validation on {} images"
            .format(key, len(dataset_unlabeled[key]), len(dataset_train[key]),
                    len(dataset_val[key])))

    for d in datasets:
        if len(set(dataset_train.values())) != 1:
            max_train_size = np.max(
                [len(dataset_train[dname]) for dname in datasets])
            dataset_train[d].image_paths = dataset_train[d].image_paths * int(
                np.ceil(
                    float(max_train_size) / len(dataset_train[d].image_paths)))
            dataset_train[d].label_paths = dataset_train[d].label_paths * int(
                np.ceil(
                    float(max_train_size) / len(dataset_train[d].label_paths)))

        # if entropy:
        # 	dataset_unlabeled[d].image_paths = dataset_unlabeled[d].image_paths*int(np.ceil(float(n_unlabeled)/len(dataset_unlabeled[d].image_paths)))
        # 	dataset_unlabeled[d].label_paths = dataset_unlabeled[d].label_paths*int(np.ceil(float(n_unlabeled)/len(dataset_unlabeled[d].label_paths)))

    loader_train = {
        dname: DataLoader(dataset_train[dname],
                          num_workers=args.num_workers,
                          batch_size=args.batch_size,
                          shuffle=True)
        for dname in datasets
    }
    loader_val = {
        dname: DataLoader(dataset_val[dname],
                          num_workers=args.num_workers,
                          batch_size=2,
                          shuffle=True,
                          drop_last=True)
        for dname in datasets
    }

    # epoch_iters = np.min([len(loader_train[dname]) for dname in datasets])

    if entropy:
        loader_unlabeled = {
            dname: DataLoader(dataset_unlabeled[dname],
                              num_workers=args.num_workers,
                              batch_size=args.batch_size,
                              shuffle=True,
                              drop_last=True)
            for dname in datasets
        }
        # epoch_iters = np.min([ np.min([len(loader[dname]) for dname in datasets]) for loader in [loader_train , loader_unlabeled]])

    savedir = f'../save_drnet/{args.savedir}'

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    loss_logpath = savedir + "/loss_log.txt"

    if (not os.path.exists(automated_log_path)
        ):  #dont add first line if it exists
        with open(automated_log_path, "a") as myfile:
            if len(datasets) > 1:
                myfile.write(
                    "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU-1\t\tTrain-IoU-2\t\tVal-IoU-1\t\tVal-IoU-2\t\tlearningRate"
                )
            else:
                myfile.write(
                    "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tVal-IoU\t\tlearningRate"
                )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    if (not os.path.exists(loss_logpath)):
        with open(loss_logpath, "w") as myfile:
            if len(datasets) > 1:
                myfile.write("Epoch\t\tS1\t\tS2\t\tUS1\t\tUS2\t\tTotal\n")
            else:
                myfile.write("Epoch\t\tS1\t\tS2\t\tTotal\n")

    #TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893

    if args.model == 'drnet':
        optimizer = SGD(model.optim_parameters(),
                        args.lr,
                        0.9,
                        weight_decay=1e-4)  ## scheduler DR-Net
    if args.cuda:
        model = torch.nn.DataParallel(model).cuda()

    doIou = {'train': args.iouTrain, 'val': args.iouVal}
    le_file = savedir + '/label_embedding.pt'
    average_epoch_loss = {'train': np.inf, 'val': np.inf}

    label_embedding = {
        key: torch.randn(NUM_LABELS[key], args.em_dim).cuda()
        for key in datasets
    }  ## Random Initialization

    ## If provided, use label embedddings
    if args.pt_em:
        fn = torch.load(args.pt_em)
        label_embedding = {
            key: torch.tensor(fn[key], dtype=torch.float).cuda()
            for key in datasets
        }

    start_epoch = 1
    if args.resume:
        #Must load weights, optimizer, epoch and best value.
        if enc:
            filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        else:
            filenameCheckpoint = savedir + '/checkpoint.pth.tar'

        assert os.path.exists(
            filenameCheckpoint
        ), "Error: resume option was used but checkpoint was not found in folder"
        checkpoint = torch.load(filenameCheckpoint)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        best_acc = checkpoint['best_acc']
        label_embedding = torch.load(le_file) if len(datasets) > 1 else None
        print("=> Loaded checkpoint at epoch {}".format(checkpoint['epoch']))

    scheduler = lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda epoch: pow(
            (1 - ((epoch - 1) / args.num_epochs)), 0.9))  ## scheduler 2
    loss_criterion = {
        key:
        torch.nn.CrossEntropyLoss(ignore_index=NUM_LABELS[key] - 1).cuda()
        for key in datasets
    }

    if len(datasets) > 1:
        similarity_module = EmbeddingLoss(NUM_LABELS, args.em_dim,
                                          label_embedding, loss_criterion)
        similarity_module = torch.nn.DataParallel(similarity_module).cuda()
        torch.save(label_embedding, le_file)

    print()
    print("========== STARTING TRAINING ===========")
    print()

    n_iters = min([len(loader_train[d]) for d in datasets])

    if entropy:
        unlabeled_iters = {
            d: len(loader_unlabeled[d]) // n_iters
            for d in datasets
        }

    for epoch in range(start_epoch, num_epochs + 1):

        epoch_start_time = time.time()
        usedLr = 0
        iou = {key: (0, 0) for key in datasets}

        ###### TRAIN begins  #################
        for phase in ['train']:

            eval_iou = doIou[phase]
            print("-----", phase, "- EPOCH", epoch, "-----")

            scheduler.step(epoch)
            model.train()

            for param_group in optimizer.param_groups:
                print("LEARNING RATE: ", param_group['lr'])
                usedLr = float(param_group['lr'])

            ## Initialize the iterables

            labeled_iterator = {
                dname: iter(loader_train[dname])
                for dname in datasets
            }

            if entropy:
                unlabeled_iterator = {
                    dname: iter(loader_unlabeled[dname])
                    for dname in datasets
                }

            if args.alpha:
                alpha = 1
            if args.beta:
                beta = 1

            epoch_loss = {d: [] for d in datasets}
            epoch_sup_loss = {d: [] for d in datasets}
            epoch_ent_loss = {d: [] for d in datasets}

            time_taken = []

            if (eval_iou):
                iou_data = {key: iouEval(NUM_LABELS[key]) for key in datasets}

            for itr in range(n_iters):

                optimizer.zero_grad()
                loss_sup = {d: 0 for d in datasets}
                loss_ent = {d: [0] for d in datasets}

                for d in datasets:

                    images_l, targets_l = next(labeled_iterator[d])

                    images_l = images_l.cuda()
                    targets_l = targets_l.cuda()

                    start_time = time.time()

                    dec_outputs = model(images_l,
                                        enc=False,
                                        finetune=args.finetune)

                    loss_s = loss_criterion[d](dec_outputs[d],
                                               targets_l.squeeze(1))
                    loss_s.backward()

                    loss_sup[d] = loss_s.item()

                    if entropy:

                        for _ in range(unlabeled_iters[d]):

                            images_u = next(unlabeled_iterator[d])
                            images_u = images_u.cuda()

                            _, en_outputs = model(images_u)

                            loss_e = torch.mean(
                                similarity_module(
                                    en_outputs, d, args.alpha,
                                    args.beta))  ## unsupervised losses
                            loss_e /= unlabeled_iters[d]
                            loss_e.backward()
                            loss_ent[d].append(loss_e.item())

                    epoch_sup_loss[d].append(loss_sup[d])
                    epoch_ent_loss[d].extend(loss_ent[d])
                    epoch_loss[d].append(
                        loss_sup[d] +
                        np.sum(loss_ent[d]))  ## Already averaged over iters

                time_taken.append(time.time() - start_time)
                optimizer.step()

                if args.steps_loss > 0 and (itr % args.steps_loss == 0
                                            or itr == n_iters - 1):
                    average = {
                        d:
                        np.around(sum(epoch_loss[d]) / len(epoch_loss[d]), 3)
                        for d in datasets
                    }
                    print(
                        f'{phase} loss: {average} (epoch: {epoch}, step: {itr})',
                        "// Avg time/img: %.4f s" %
                        (sum(time_taken) / len(time_taken) / args.batch_size))

                average = {d: np.mean(epoch_loss[d]) for d in datasets}
                average_epoch_loss[phase] = sum(average.values())

                if entropy:
                    average_epoch_sup_loss = {
                        d: np.mean(epoch_sup_loss[d])
                        for d in datasets
                    }
                    average_epoch_ent_loss = {
                        d: np.mean(epoch_ent_loss[d])
                        for d in datasets
                    }

                    ## Write the epoch wise supervised and total unsupervised losses.
                    with open(loss_logpath, "a") as myfile:
                        if len(datasets) > 1 and (itr % args.steps_loss == 0
                                                  or itr == n_iters - 1):
                            myfile.write(
                                "%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n"
                                % (epoch,
                                   average_epoch_sup_loss.get(datasets[0], 0),
                                   average_epoch_sup_loss.get(datasets[1], 0),
                                   average_epoch_ent_loss.get(datasets[0], 0),
                                   average_epoch_ent_loss.get(datasets[1], 0),
                                   average_epoch_loss[phase]))

            ## Todo: A better way to close the worker threads.
            for d in datasets:
                while True:
                    try:
                        _ = next(labeled_iterator[d])
                    except StopIteration:
                        break

                if entropy:
                    while True:
                        try:
                            _ = next(unlabeled_iterator[d])
                        except StopIteration:
                            break

            iou = {key: (0, 0) for key in datasets}

            if (eval_iou):
                iou = {key: iou_data[key].getIoU() for key in datasets}

                iouStr_label = {
                    key: '{:0.2f}'.format(iou[key][0] * 100)
                    for key in datasets
                }
                for d in datasets:
                    print("EPOCH IoU on {} dataset: {} %".format(
                        d, iouStr_label[d]))

        ########## Train ends ###############################

        ##### Validation ###############
        if (epoch == 1) or (epoch % 5 == 0):  ## validation after every 5 epoch
            for phase in ['val']:

                eval_iou = doIou[phase]
                print("-----", phase, "- EPOCH", epoch, "-----")

                model.eval()

                if (eval_iou):
                    iou_data = {d: iouEval(NUM_LABELS[d]) for d in datasets}

                epoch_val_loss = {d: [] for d in datasets}
                if args.pAcc:
                    pAcc = {d: [] for d in datasets}

                for d in datasets:
                    time_taken = []

                    for itr, (images, targets) in enumerate(loader_val[d]):

                        start_time = time.time()

                        images = images.cuda()
                        targets = targets.cuda()

                        with torch.set_grad_enabled(False):

                            seg_output = model(images, enc=False)
                            loss = loss_criterion[d](seg_output[d],
                                                     targets.squeeze(1))

                            if eval_iou:
                                pred = seg_output[d].argmax(1, True).data
                                iou_data[d].addBatch(pred, targets.data)
                                if args.pAcc:
                                    a = (pred == targets.data)
                                    pAcc[d].append(torch.mean(a.double()))

                            epoch_val_loss[d].append(loss.item())

                        time_taken.append(time.time() - start_time)

                        if args.steps_loss > 0 and (itr % args.steps_loss == 0
                                                    or itr
                                                    == len(loader_val[d]) - 1):
                            average = np.around(np.mean(epoch_val_loss[d]), 3)
                            print(
                                f'{d}: {phase} loss: {average} (epoch: {epoch}, step: {itr})',
                                "// Avg time/img: %.4f s" %
                                (sum(time_taken) / len(time_taken) /
                                 args.batch_size))

                average_epoch_loss[phase] = np.sum(
                    [np.mean(epoch_val_loss[d]) for d in datasets])

                if (eval_iou):
                    iou = {d: iou_data[d].getIoU() for d in datasets}

                    iouStr_label = {
                        d: '{:0.2f}'.format(iou[d][0] * 100)
                        for d in datasets
                    }
                    for d in datasets:
                        print("EPOCH IoU on {} dataset: {} %".format(
                            d, iouStr_label[d]))
                        if args.pAcc:
                            print(f'{d}: pAcc : {np.mean(pAcc[d])*100}%')
        ############# VALIDATION ends #######################

        print("Epoch time {} s".format(time.time() - epoch_start_time))

        # remember best valIoU and save checkpoint
        if sum([iou[key][0] for key in datasets]) == 0:
            current_acc = -average_epoch_loss['val']
        else:
            current_acc = sum([iou[key][0] for key in datasets]) / len(
                datasets)  ## Average of the IoUs to save best model

        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)

        filenameCheckpoint = savedir + '/checkpoint.pth.tar'
        filenameBest = savedir + '/model_best.pth.tar'

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': str(model),
                'state_dict': model.state_dict(),
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
            }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH

        filename = f'{savedir}/model-{epoch:03}.pth'
        filenamebest = f'{savedir}/model_best.pth'

        if args.epochs_save > 0 and epoch > 0 and epoch % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(f'save: {filename} (epoch: {epoch})')

        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            print(f'save: {filenamebest} (epoch: {epoch})')

            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d\n" % (epoch))
                for d in datasets:
                    myfile.write("Val-IoU-%s= %.4f\n" % (d, iou[d][0]))

                myfile.write("\n\n")

                for d in datasets:
                    myfile.write(
                        "Classwise IoU for best epoch in %s is ... \n" % (d))
                    for values in iou[d][1]:
                        myfile.write("%.4f " % (values))
                    myfile.write("\n\n")

        with open(automated_log_path, "a") as myfile:
            iouTrain = 0
            if len(datasets) > 1:
                myfile.write(
                    "\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f"
                    % (epoch, average_epoch_loss['train'],
                       average_epoch_loss['val'], iouTrain, iouTrain,
                       iou[datasets[0]][0], iou[datasets[1]][0], usedLr))
            else:
                myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" %
                             (epoch, average_epoch_loss['train'],
                              average_epoch_loss['val'], iouTrain,
                              iou[datasets[0]][0], usedLr))

    return (model)
Exemplo n.º 12
0
def main(args):

    modelpath = args.loadDir + args.loadModel
    weightspath = args.loadDir + args.loadWeights

    print("Loading model: " + modelpath)
    print("Loading weights: " + weightspath)

    model = FSFNet(NUM_CLASSES)

    #model = torch.nn.DataParallel(model)
    if (not args.cpu):
        model = torch.nn.DataParallel(model).cuda()

    def load_my_state_dict(
            model, state_dict
    ):  #custom function to load model when not all dict elements
        own_state = model.state_dict()
        for name, param in state_dict.items():
            # print(name)
            # print(param)
            if name not in own_state:

                if name.startswith("module."):
                    own_state[name.split("module.")[-1]].copy_(param)
                else:
                    print(name, " not loaded")
                    continue
            else:
                own_state[name].copy_(param)
        return model

    model = load_my_state_dict(
        model,
        torch.load(weightspath, map_location=lambda storage, loc: storage))
    print("Model and weights LOADED successfully")

    model.eval()

    if (not os.path.exists(args.datadir)):
        print("Error: datadir could not be loaded")

    loader = DataLoader(camvid(args.datadir,
                               input_transform_camvid,
                               target_transform_camvid,
                               subset=args.subset),
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=False)

    iouEvalVal = iouEval(NUM_CLASSES)

    start = time.time()

    for step, (images, labels, filename, filenameGt) in enumerate(loader):
        if (not args.cpu):
            images = images.cuda()
            labels = labels.cuda()

        inputs = Variable(images, volatile=True)
        outputs = model(inputs)

        iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, labels)

        filenameSave = filename[0].split("images/")[1]

        print(step, filenameSave)

    iouVal, iou_classes = iouEvalVal.getIoU()

    iou_classes_str = []
    for i in range(iou_classes.size(0)):
        iouStr = getColorEntry(iou_classes[i]) + '{:0.2f}'.format(
            iou_classes[i] * 100) + '\033[0m'
        iou_classes_str.append(iouStr)

    print("---------------------------------------")
    print("Took ", time.time() - start, "seconds")
    print("=======================================")
    #print("TOTAL IOU: ", iou * 100, "%")
    print("Per-Class IoU:")
    print(iou_classes_str[0], "Sky")
    print(iou_classes_str[1], "Building")
    print(iou_classes_str[2], "Pole")
    print(iou_classes_str[3], "Road")
    print(iou_classes_str[4], "Pavement")
    print(iou_classes_str[5], "Tree")
    print(iou_classes_str[6], "SignSymbol")
    print(iou_classes_str[7], "Fence")
    print(iou_classes_str[8], "Car")
    print(iou_classes_str[9], "Pedestrian")
    print(iou_classes_str[10], "Bicyclist")

    print("=======================================")
    iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(iouVal * 100) + '\033[0m'
    print("MEAN IoU: ", iouStr, "%")
def train(args, rmodel, model, enc=False):
    best_acc = 0
    weight = classWeights(NUM_CLASSES)
    assert os.path.exists(
        args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(augment=True, height=args.height)
    co_transform_val = MyCoTransform(augment=False, height=args.height)
    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train,
                        num_workers=args.num_workers,
                        batch_size=args.batch_size,
                        shuffle=True)
    loader_val = DataLoader(dataset_val,
                            num_workers=args.num_workers,
                            batch_size=args.batch_size,
                            shuffle=False)

    if args.cuda:
        weight = weight.cuda()
    rcriterion = torch.nn.L1Loss()

    savedir = '/home/shyam.nandan/NewExp/F_erfnet_pytorch_ours_w_gt_v2_multiply/save/' + args.savedir  #change path

    if (enc):
        automated_log_path = savedir + "/automated_log_encoder.txt"
        modeltxtpath = savedir + "/model_encoder.txt"
    else:
        automated_log_path = savedir + "/automated_log.txt"
        modeltxtpath = savedir + "/model.txt"

    if (not os.path.exists(automated_log_path)):
        with open(automated_log_path, "a") as myfile:
            myfile.write(
                "Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate"
            )

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    optimizer = Adam(model.parameters(),
                     5e-4, (0.9, 0.999),
                     eps=1e-08,
                     weight_decay=2e-4)  ##
    roptimizer = Adam(rmodel.parameters(), 2e-4,
                      (0.9, 0.999))  ## restoration scheduler

    start_epoch = 1
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
    rscheduler = lr_scheduler.StepLR(roptimizer, step_size=30,
                                     gamma=0.5)  ## Restoration schedular

    for epoch in range(start_epoch, args.num_epochs + 1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step()  ## scheduler 2
        rscheduler.step()

        epoch_loss = []
        time_train = []

        doIouTrain = args.iouTrain
        doIouVal = args.iouVal

        if (doIouTrain):
            iouEvalTrain = iouEval(NUM_CLASSES)

        usedLr = 0
        rusedLr = 0
        for param_group in optimizer.param_groups:
            print("Segmentation LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])
        for param_group in roptimizer.param_groups:
            print("Restoration LEARNING RATE: ", param_group['lr'])
            rusedLr = float(param_group['lr'])

        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES)

        for step, (timages, images, labels, filename) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
                timages = timages.cuda()

            inputs = Variable(
                timages, volatile=True
            )  #volatile flag makes it free backward or outputs for eval
            itargets = Variable(images, volatile=True)
            targets = Variable(labels, volatile=True)

            ss_inputs = rmodel(inputs, flag=0, r_fb1=0, r_fb2=0)

            outs = model(ss_inputs, only_encode=enc)

            tminus_outs = outs.detach()
            tplus_outs = outs.detach()

            for num_feedback in range(3):

                optimizer.zero_grad()
                roptimizer.zero_grad()

                ss_inputs = rmodel(inputs,
                                   flag=1,
                                   r_fb1=(tplus_outs - tminus_outs),
                                   r_fb2=ss_inputs.detach())

                loss = rcriterion(ss_inputs, itargets)

                outs = model(ss_inputs.detach(), only_encode=enc)

                tminus_outs = tplus_outs
                tplus_outs = outs.detach()

            outputs = outs
            del outs, tminus_outs, tplus_outs
            gc.collect()
            Gamma = [0, 0, 0]
            Alpha = [1, 1, 1]
            loss = CB_iFl(outputs,
                          targets[:, 0],
                          weight,
                          gamma=Gamma[0],
                          alpha=Alpha[0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            if (doIouVal):
                #start_time_iou = time.time()
                iouEvalVal_img = iouEval(NUM_CLASSES)
                iouEvalVal_img.addBatch(
                    outputs.max(1)[1].unsqueeze(1).data, targets.data)

                iouEvalVal.addBatch(
                    outputs.max(1)[1].unsqueeze(1).data, targets.data)

                #print ("Time to add confusion matrix: ", time.time() - start_time_iou)
                label_color = Colorize()(
                    outputs[0].max(0)[1].byte().cpu().data.unsqueeze(0))
                label_save = ToPILImage()(label_color)

                filenameSave = '../save_color_restored_joint_afl_CBFL/' + filename[
                    0].split('/')[-2]

                im_iou, _ = iouEvalVal_img.getIoU()

                if not os.path.exists(filenameSave):
                    os.makedirs(filenameSave)
            #Uncomment to save output
            #label_save.save(filenameSave+ '/' + str(" %6.4f " %im_iou[0].data.numpy()) + '_' + filename[0].split('/')[-1])

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
                print('Val loss:  ', average, 'Epoch:  ', epoch, 'Step:  ',
                      step)

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)

        iouVal = 0
        if (doIouVal):
            iouVal, iou_classes = iouEvalVal.getIoU()
            iouStr = getColorEntry(iouVal) + '{:0.2f}'.format(
                iouVal * 100) + '\033[0m'
            print(iouVal, iou_classes, iouStr)

    return (model)
Exemplo n.º 14
0
def train(args, rmodel, model, enc=False):

    best_acc = 0
    weight = classWeights(NUM_CLASSES)
    assert os.path.exists(args.datadir), "Error: datadir (dataset directory) could not be loaded"

    co_transform = MyCoTransform(augment=True, height=args.height)
    co_transform_val = MyCoTransform(augment=False, height=args.height)

    dataset_train = cityscapes(args.datadir, co_transform, 'train')
    dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

    loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
    loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

    if args.cuda:
        weight = weight.cuda()
    rcriterion = torch.nn.L1Loss()
    
    savedir = '/home/shyam.nandan/NewExp/final_code/save/' + args.savedir
    automated_log_path = savedir + "/automated_log.txt"
    modeltxtpath = savedir + "/model.txt"    

    if (not os.path.exists(automated_log_path)):    
        with open(automated_log_path, "a") as myfile:
            myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")

    with open(modeltxtpath, "w") as myfile:
        myfile.write(str(model))

    optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),eps=1e-08, weight_decay=2e-4)
    roptimizer = Adam(rmodel.parameters(), 2e-4, (0.9, 0.999))                                       

    start_epoch = 1
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
    rscheduler = lr_scheduler.StepLR(roptimizer, step_size=30, gamma=0.5)                        
    
    for epoch in range(start_epoch, args.num_epochs+1):
        print("----- TRAINING - EPOCH", epoch, "-----")

        scheduler.step()    
        rscheduler.step()
        
	epoch_loss = []
        time_train = []
     
        doIouTrain = args.iouTrain   
        doIouVal =  args.iouVal      

        if (doIouTrain):
            iouEvalTrain = iouEval(NUM_CLASSES)

        usedLr = 0
        rusedLr = 0

        for param_group in optimizer.param_groups:
            print("Segmentation LEARNING RATE: ", param_group['lr'])
            usedLr = float(param_group['lr'])
        for param_group in roptimizer.param_groups:
            print("Restoration LEARNING RATE: ", param_group['lr'])
            rusedLr = float(param_group['lr'])

        model.train()
        for step, (timages, images, labels) in enumerate(loader):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
                timages = timages.cuda()
           
            inputs = Variable(timages)
	    itargets = Variable(images)
            targets = Variable(labels)  
	    
	    ss_inputs = rmodel(inputs, flag = 0, r_fb1 = 0, r_fb2 = 0)
            
            outs = model(ss_inputs, only_encode=enc)

            tminus_outs = outs.detach()
            tplus_outs = outs.detach()
            
            outputs = []
            for num_feedback in range(3):
            	optimizer.zero_grad()
            	roptimizer.zero_grad()
                
                ss_inputs = rmodel(inputs, flag= 1, r_fb1 = (tplus_outs - tminus_outs) , r_fb2 = ss_inputs.detach())

                loss = rcriterion(ss_inputs, itargets)

                loss.backward()
                roptimizer.step()

            	optimizer.zero_grad()
            	roptimizer.zero_grad()
                   
            	outs = model(ss_inputs.detach(),only_encode=enc)
                
                outputs.append(outs)

                tminus_outs = tplus_outs
                tplus_outs = outs.detach()

            del outs, tminus_outs, tplus_outs
            gc.collect()
            
            loss = 0.0
            Gamma = [0, 0.1, 0.2]
            Alpha = [1, 1, 1]
            
            for i, o in enumerate(outputs):
                loss += CB_iFl(o, targets[:, 0], weight, gamma = Gamma[i], alpha = Alpha[i])
       
            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.data[0])
            time_train.append(time.time() - start_time)

            if (doIouTrain):
         
                iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
               
            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss) / len(epoch_loss)
		print('loss:  ', average.data.cpu()[0], 'Epoch:  ', epoch, 'Step:  ', step)

        average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)        
        iouTrain = 0
        if (doIouTrain):
            iouTrain, iou_classes = iouEvalTrain.getIoU()
            iouStr = getColorEntry(iouTrain)+'{:0.2f}'.format(iouTrain*100) + '\033[0m'
            print ("EPOCH IoU on TRAIN set: ", iouStr, "%")  

        print("----- VALIDATING - EPOCH", epoch, "-----")
        model.eval()
        epoch_loss_val = []
        time_val = []

        if (doIouVal):
            iouEvalVal = iouEval(NUM_CLASSES)

        for step, (timages, images, labels) in enumerate(loader_val):
            start_time = time.time()
            if args.cuda:
                images = images.cuda()
                labels = labels.cuda()
		timages = timages.cuda()

            inputs = Variable(timages, volatile=True)   
            itargets = Variable(images, volatile=True)
	    targets = Variable(labels, volatile=True)	    
	    ss_inputs = rmodel(inputs, flag = 0, r_fb1 = 0, r_fb2 = 0)
            
            outs = model(ss_inputs, only_encode=enc)
            tminus_outs = outs.detach()
            tplus_outs = outs.detach()
                        
            for num_feedback in range(3):

            	optimizer.zero_grad()
            	roptimizer.zero_grad()

                ss_inputs = rmodel(inputs, flag= 1, r_fb1 = (tplus_outs - tminus_outs) , r_fb2 = ss_inputs.detach())

                loss = rcriterion(ss_inputs, itargets)

            	outs = model(ss_inputs.detach(),only_encode=enc)

                tminus_outs = tplus_outs

                tplus_outs = outs.detach()
  
            ##################################

            del ss_inputs, tplus_outs, tminus_outs
            outputs = outs
            loss = CB_iFl(outputs, targets[:, 0], weight, gamma = Gamma[0], alpha = Alpha[0])
            epoch_loss_val.append(loss.data[0])
            time_val.append(time.time() - start_time)

            if (doIouVal):
                iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)

            if args.steps_loss > 0 and step % args.steps_loss == 0:
                average = sum(epoch_loss_val) / len(epoch_loss_val)
		print('Val loss:  ', average, 'Epoch:  ', epoch, 'Step:  ', step)

        average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)

        iouVal = 0
        if (doIouVal):
            iouVal, iou_classes = iouEvalVal.getIoU()
            iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
            print ("EPOCH IoU on VAL set: ", iouStr, "%") 
           
        # remember best valIoU and save checkpoint
        if iouVal == 0:
            current_acc = -average_epoch_loss_val
        else:
            current_acc = iouVal 

        is_best = current_acc > best_acc
        best_acc = max(current_acc, best_acc)

        filenameCheckpoint = savedir + '/checkpoint.pth.tar'
        filenameBest = savedir + '/model_best.pth.tar'
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': str(model),
            'state_dict': model.state_dict(),
            'best_acc': best_acc,
            'optimizer' : optimizer.state_dict(),
        }, is_best, filenameCheckpoint, filenameBest)

        #SAVE MODEL AFTER EPOCH
        filename = savedir + '/model-{epoch:03}.pth'
        filenamebest = savedir + '/model_best.pth'

        if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
            torch.save(model.state_dict(), filename)
            print(filename, epoch)
        if (is_best):
            torch.save(model.state_dict(), filenamebest)
            torch.save(rmodel.state_dict(), savedir + '/rmodel_best.pth')
            print(filenamebest,epoch)
            with open(savedir + "/best.txt", "w") as myfile:
                 myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))            

        #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
        #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
        with open(automated_log_path, "a") as myfile:
            myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
    
    return(model)