示例#1
0
def train(train_loader, net, criterion, optimizer, epoch, train_args,
          total_num_paramters):
    train_loss = AverageMeter()

    # curr_iter : total dataset per epoch
    curr_iter = (epoch - 1) * len(train_loader)
    index = 0

    start_time = time.time()
    net.train()
    for step, data in enumerate(train_loader):
        predictions_all = []
        visual = []

        inputs, labels = data

        assert inputs.size()[2:] == labels.size()[1:]
        N = inputs.size(0)
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()

        optimizer.zero_grad()

        outputs = net(inputs)
        assert outputs.size()[2:] == labels.size()[1:]
        assert outputs.size()[1] == segmentation_dataloader.num_classes

        before_op_time = timeit.default_timer()
        loss = criterion(outputs, labels)
        duration = timeit.default_timer() - before_op_time

        loss.backward()
        optimizer.step()
        batch_time = time.time() - start_time

        train_loss.update(loss.data[0], N)
        curr_iter += 1

        writer.add_scalar('train_loss', train_loss.avg, curr_iter)

        if (step + 1) % train_args.print_frequency == 0:
            examples_time = args.train_batch_size / duration
            print(
                'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f'
                's' % (epoch, step + 1, len(train_loader), train_loss.avg,
                       examples_time, batch_time))

            # SAVE THE IMAGES AND THE MODEL
            if (step + 1) % train_args.model_freq == 0:
                torch.save(
                    net.state_dict(),
                    os.path.join(ckpt_path, 'Model', ImageNet,
                                 exp_name_ImageNet,
                                 'model-{}'.format(step + 1) + '.pkl'))
                data_transform = standard_transforms.ToTensor()

                np_outputs = outputs.data.cpu().numpy()
                result = np_outputs.argmax(axis=1)
                predictions_all.append(result)
            else:
                continue

            predictions_all = np.concatenate(predictions_all)
            for idx, data in enumerate(predictions_all):
                predictions_pil = segmentation_dataloader.colorize_mask(data)
                predictions = data_transform(predictions_pil.convert('RGB'))
                visual.extend([predictions])

            visual = torch.stack(visual, 0)
            visual = vutils.make_grid(visual, nrow=1, padding=0)
            # result = np_outputs.argmax(axis=1)[0]
            # row, col = result.shape
            # dst = np.zeros((row, col, 3), dtype=np.uint8)
            #
            # for i in range(19):
            #     dst[result == i] = COLOR_MAP[i]
            # dst = np.array(dst, dtype=np.uint8)
            # dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR)
            # if not os.path.exists(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction')):
            #     os.makedirs(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction'))
            #
            # cv2.imwrite(os.path.join(ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet, 'prediction/%06d.png' %
            #                          epoch), dst)
            writer.add_image('Output_image_{}'.format(epoch), visual)

    with open(
            os.path.join(ckpt_path, 'TensorboardX', ImageNet,
                         exp_name_ImageNet,
                         'LR_v0{}_{}.txt'.format(x, version)), 'a') as LRtxt:
        LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format(
            index, epoch, optimizer.param_groups[0]['lr']) + '\n')
        index += 1
def train(train_loader, net, criterion, optimizer, epoch, train_args,
          train_set):
    import shutil
    src = "/home/mk/Semantic_Segmentation/DenseASPP-master/My_train/segmentation_main2.py"
    copy_path = os.path.join(
        ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet,
        "segmentation_main2_" + "v_0{}_{}.py".format(x, version))
    shutil.copy(src, copy_path)

    net.train()
    batch_time = AverageMeter()
    train_loss = AverageMeter()
    examples_time = AverageMeter()

    num_training_samples = len(train_set)
    steps_per_epoch = np.ceil(num_training_samples /
                              args.train_batch_size).astype(np.int32)
    num_total_steps = args.num_epochs * steps_per_epoch

    print("total number of samples: {}".format(num_training_samples))
    print("total number of steps  : {}".format(num_total_steps))

    # curr_iter : total dataset per epoch
    curr_iter = (epoch - 1) * len(train_loader)

    # COUNT_PARAMS
    total_num_paramters = 0
    for param in net.parameters():
        total_num_paramters += np.array(list(param.size())).prod()

    print("number of trainable parameters: {}".format(total_num_paramters))
    # for step in range(num_total_steps):
    #     if step and step % 100 == 0:
    #         time_sofar = (time.time() - start_time) / 3600
    #         training_time_left = (num_total_steps / step - 1.0) * time_sofar
    # Data = [[train_loader], [range(num_total_steps)]]
    # for [[i, data], step] in Data :
    index = 0
    start_time = time.time()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        assert inputs.size()[2:] == labels.size()[1:]
        N = inputs.size(0)
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()

        optimizer.zero_grad()
        outputs = net(inputs)
        assert outputs.size()[2:] == labels.size()[1:]
        assert outputs.size()[1] == segmentation_dataloader.num_classes

        before_op_time = time.time()
        # loss = torch.nn.functional.cross_entropy(input=outputs, target=labels, ignore_index=segmentation_dataloader.ignore_label)
        loss = criterion(outputs, labels)
        duration = time.time() - before_op_time

        loss.backward()
        optimizer.step()
        batch_time.update(time.time() - start_time)

        # why use N?? N is batch size?
        train_loss.update(loss.data[0], N)
        curr_iter += 1

        # [[ writer.add_scalar ]]
        # writer.add_scalar('myscalar', value, iteration)
        writer.add_scalar('train_loss', train_loss.avg, curr_iter)

        if (i + 1) % train_args.print_frequency == 0:
            examples_time.update(args.train_batch_size / duration)
            # print_string = 'epoch {: %d} | iter { %d / %d} | train_loss: {%.5f} | time_elapsed: {%.2f}h'
            # print_string = 'batch {:>6} | examples/s: {:4.2f} | loss: {:.5f} | time elapsed: {:.2f}h | time left: {:.2f}h'
            # print(print_string.format(step, examples_per_sec, loss_value, time_sofar, training_time_left))
            print(
                'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f'
                's' % (epoch, i + 1, len(train_loader), train_loss.avg,
                       examples_time.avg, batch_time.avg))

        poly_lr_scheduler(optimizer=optimizer,
                          init_lr=args.learning_rate,
                          epoch=epoch - 1)
        # misc.PolyLR(optimizer=optimizer, curr_iter=epoch-1, max_iter=args.num_epochs, lr_decay=0.9)
        with open(
                os.path.join(ckpt_path, 'TensorboardX', ImageNet,
                             exp_name_ImageNet,
                             'LR_v0{}_{}.txt'.format(x, version)),
                'a') as LRtxt:
            LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format(
                index, epoch, optimizer.param_groups[0]['lr']) + '\n')
            index += 1
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore,
             visualize):
    net.eval()
    val_loss = AverageMeter()

    inputs_all, gts_all, predictions_all = [], [], []

    for vi, data in enumerate(val_loader):
        inputs, gts = data
        N = inputs.size(0)
        inputs = Variable(inputs, volatile=True).cuda()
        gts = Variable(gts, volatile=True).cuda()

        outputs = net(inputs)
        # outputs.data : when batch is 0, pixel value in 19 classes
        predictions = outputs.data.max(1)[1].squeeze_(1).cpu().numpy()

        val_loss.update(criterion(outputs, gts).data[0] / N, N)
        # validation_loss = torch.nn.functional.cross_entropy(input=outputs, target=gts, ignore_index=segmentation_dataloader.ignore_label)
        # val_loss.update(validation_loss.data[0] / N, N)
        for i in inputs:
            if random.random() > train_args.val_img_sample_rate:
                inputs_all.append(None)
            else:
                inputs_all.append(i.data.cpu())
        gts_all.append(gts.data.cpu().numpy())
        predictions_all.append(predictions)

    gts_all = np.concatenate(gts_all)
    predictions_all = np.concatenate(predictions_all)

    acc, acc_cls, acc_cls_mean, mean_iu, fwavacc = evaluate(
        predictions_all, gts_all, segmentation_dataloader.num_classes)

    num_validate = epoch
    with open(
            os.path.join(ckpt_path, 'TensorboardX', ImageNet,
                         exp_name_ImageNet,
                         'class_accuracy{}_{}.txt'.format(x, version)),
            'a') as acc_cls_txt:
        acc_cls_txt.write(
            "================================the number of validation : {}================================"
            "\nroad: {}, \nsidewalk: {}, \nbuilding: {}, \nwall: {}, \nfence: {}, \npole: {}, \ntraffic light: {}, \ntraffic sign: {},"
            "\nvegetation: {}, \nterrain: {}, \nsky: {}, \nperson: {}, \nrider: {}, \ncar: {}, \ntruck: {}, \nbus: {}, \ntrain: {}, \nmotorcycle: {},"
            "\nbicycle: {}\n\n".format(
                num_validate, acc_cls[0] * 100, acc_cls[1] * 100,
                acc_cls[2] * 100, acc_cls[3] * 100, acc_cls[4] * 100,
                acc_cls[5] * 100, acc_cls[6] * 100, acc_cls[7] * 100,
                acc_cls[8] * 100, acc_cls[9] * 100, acc_cls[10] * 100,
                acc_cls[11] * 100, acc_cls[12] * 100, acc_cls[13] * 100,
                acc_cls[14] * 100, acc_cls[15] * 100, acc_cls[16] * 100,
                acc_cls[17] * 100, acc_cls[18] * 100))

    if mean_iu > train_args.best_record['mean_iu']:
        train_args.best_record['val_loss'] = val_loss.avg
        train_args.best_record['epoch'] = epoch
        train_args.best_record['acc'] = acc
        # acc_cls : accuracy class
        train_args.best_record['acc_cls_mean'] = acc_cls_mean
        # mean_iu : mean_intersection over union
        train_args.best_record['mean_iu'] = mean_iu
        # fwavacc : frequency weighted average accuracy
        train_args.best_record['fwavacc'] = fwavacc
        # snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % (
        #     epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[0]['lr']
        # )
        snapshot_name = 'epoch_%d_loss_%.2f_acc_%.2f_acc-cls_%.2f_mean-iu_%.2f_fwavacc_%.2f_lr_%.10f' % (
            epoch, val_loss.avg, acc, acc_cls_mean, mean_iu, fwavacc,
            optimizer.param_groups[0]['lr'])
        torch.save(
            net.state_dict(),
            os.path.join(ckpt_path, 'Model', ImageNet, exp_name_ImageNet,
                         snapshot_name + '_v0{}'.format(x) + '.pth'))
        # torch.save(optimizer.state_dict(),os.path.join(ckpt_path, 'Model', ImageNet, exp_name_ImageNet, 'opt_' + snapshot_name + '_v0{}'.format(x) + '.pth'))

        # setting path to save the val_img
        if train_args.val_save_to_img_file:
            # to_save_dir = os.path.join(ckpt_path, exp_name, 'epoch'+str(epoch)+'_v0{}'.format(x))
            to_save_dir = os.path.join(
                ckpt_path, 'TensorboardX', ImageNet, exp_name_ImageNet,
                'epoch' + str(epoch) + '_v0{}'.format(x))
            check_mkdir(to_save_dir)

        val_visual = []
        for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)):
            if data[0] is None:
                continue

            # data[0] : inputs_all
            input_pil = restore(data[0])
            gt_pil = segmentation_dataloader.colorize_mask(data[1])
            predictions_pil = segmentation_dataloader.colorize_mask(data[2])

            if train_args.val_save_to_img_file:
                # saving the restored image
                input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx))
                predictions_pil.save(
                    os.path.join(to_save_dir, '%d_prediction.png' % idx))
                gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx))

            # input RGB image, gt image and prediction image are showed on tensorboardX
            val_visual.extend([
                visualize(input_pil.convert('RGB')),
                visualize(gt_pil.convert('RGB')),
                visualize(predictions_pil.convert('RGB'))
            ])
        val_visual = torch.stack(val_visual, 0)

        # [[ make_grid() ]]
        # make_grid function : prepare the image array and send the result to add_image()
        # --------------------- make_grid takes a 4D tensor and returns tiled images in 3D tensor ---------------------
        val_visual = vutils.make_grid(val_visual, nrow=3, padding=0)

        # [[ writer.add_image ]]
        # writer.add_image('imresult', x, iteration) : save the image.
        writer.add_image(snapshot_name, val_visual)

    print(
        '-----------------------------------------------------------------------------------------------------------'
    )
    print(
        '[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls_mean %.5f], [mean_iu %.5f], [fwavacc %.5f]'
        % (epoch, val_loss.avg, acc, acc_cls_mean, mean_iu, fwavacc))

    print(
        'best record: [val loss %.5f], [acc %.5f], [acc_cls_mean %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]'
        % (train_args.best_record['val_loss'], train_args.best_record['acc'],
           train_args.best_record['acc_cls_mean'],
           train_args.best_record['mean_iu'],
           train_args.best_record['fwavacc'], train_args.best_record['epoch']))
    print(
        '-----------------------------------------------------------------------------------------------------------'
    )

    # [[ add_scalar ]]
    # Adds many scalar data to summary.
    writer.add_scalar('val_loss', val_loss.avg, epoch)
    writer.add_scalar('acc', acc, epoch)
    writer.add_scalar('acc_cls_mean', acc_cls_mean, epoch)
    writer.add_scalar('mean_iu', mean_iu, epoch)
    writer.add_scalar('fwavacc', fwavacc, epoch)
    writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch)

    net.train()
    return val_loss.avg
def train(train_loader, net, criterion, optimizer, epoch, train_args,
          train_set):
    # batch_time = AverageMeter()
    train_loss = AverageMeter()
    # examples_time = AverageMeter()

    num_training_samples = len(train_set)
    steps_per_epoch = np.ceil(num_training_samples /
                              args.train_batch_size).astype(np.int32)
    num_total_steps = args.num_epochs * steps_per_epoch

    print("total number of samples: {}".format(num_training_samples))
    print("total number of steps  : {}".format(num_total_steps))

    # curr_iter : total dataset per epoch
    curr_iter = (epoch - 1) * len(train_loader)

    # COUNT_PARAMS
    total_num_paramters = 0
    for param in net.parameters():
        total_num_paramters += np.array(list(param.size())).prod()

    print("number of trainable parameters: {}".format(total_num_paramters))
    index = 0

    start_time = time.time()
    net.train()
    for i, data in enumerate(train_loader):
        inputs, labels = data
        assert inputs.size()[2:] == labels.size()[1:]
        N = inputs.size(0)
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()

        optimizer.zero_grad()
        """zero_grad() : Sets gradients of all model parameters to zero."""
        # img = "/home/mk/Semantic_Segmentation/Seg_dataset/Cityscapes_dataset/leftImg8bit_trainvaltest/leftImg8bit/train/aachen/aachen_000001_000019_leftImg8bit.png"
        # import cv2
        # from torchvision import transforms
        # pil_img = Image.open(img)
        # data_tr = transforms.Compose([transforms.RandomResizedCrop(512),transforms.ToTensor()])
        #
        # input__ = Variable(data_tr(pil_img).unsqueeze(0).cuda())
        # net(input__)
        outputs = net(inputs)
        assert outputs.size()[2:] == labels.size()[1:]
        assert outputs.size()[1] == segmentation_dataloader.num_classes

        before_op_time = timeit.default_timer()
        # loss = torch.nn.functional.cross_entropy(input=outputs, target=labels, ignore_index=segmentation_dataloader.ignore_label)
        loss = criterion(outputs, labels)
        duration = timeit.default_timer() - before_op_time

        loss.backward()
        optimizer.step()
        batch_time = time.time() - start_time

        train_loss.update(loss.data[0], N)
        curr_iter += 1

        writer.add_scalar('train_loss', train_loss.avg, curr_iter)

        if (i + 1) % train_args.print_frequency == 0:
            examples_time = args.train_batch_size / duration
            print(
                'epoch: %d | iter: %d / %d | train loss: %.5f | examples/s: %4.2f | time_elapsed: %.5f'
                's' % (epoch, i + 1, len(train_loader), train_loss.avg,
                       examples_time, batch_time))

        with open(
                os.path.join(ckpt_path, 'TensorboardX', ImageNet,
                             exp_name_ImageNet,
                             'LR_v0{}_{}.txt'.format(x, version)),
                'a') as LRtxt:
            LRtxt.write("index : {}, epoch : {}, learning rate : {: f}".format(
                index, epoch, optimizer.param_groups[0]['lr']) + '\n')
            index += 1