예제 #1
0
def main(args, logger):

    cudnn.enabled = True  # Enables bencnmark mode in cudnn, to enable the inbuilt
    cudnn.benchmark = True  # cudnn auto-tuner to find the best algorithm to use for
    # our hardware
    #Setup random seed
    # cudnn.deterministic = True # ensure consistent results
    # if benchmark = True, deterministic will be False.

    seed = random.randint(1, 10000)
    print('======>random seed {}'.format(seed))
    logger.info('======>random seed {}'.format(seed))

    random.seed(seed)  # python random seed
    np.random.seed(seed)  # set numpy random seed

    torch.manual_seed(seed)  # set random seed for cpu
    if torch.cuda.is_available():
        # torch.cuda.manual_seed(seed) # set random seed for GPU now
        torch.cuda.manual_seed_all(seed)  # set random seed for all GPU

    # Setup device
    device = torch.device(
        "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")

    # setup DatasetLoader
    train_set = vaihingenloader(root=args.root, split='train')
    test_set = vaihingenloader(root=args.root, split='test')

    kwargs = {'num_workers': args.workers, 'pin_memory': True}
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              drop_last=True,
                              shuffle=True,
                              **kwargs)
    test_loader = DataLoader(test_set,
                             batch_size=1,
                             drop_last=False,
                             shuffle=False,
                             **kwargs)

    # setup optimization criterion
    criterion = utils.utils.cross_entropy2d

    # setup model
    print('======> building network')
    logger.info('======> building network')

    #
    model = FCNRes101().to(device)
    # model = FCNRes34().cuda()
    #     model = UNet(n_channels=3, n_classes=6,).to(device)
    if torch.cuda.device_count() > 1:
        device_ids = list(map(int, args.gpu.split(',')))
        model = nn.DataParallel(model, device_ids=device_ids)

    print("======> computing network parameters")
    logger.info("======> computing network parameters")

    total_paramters = netParams(model)
    print("the number of parameters: " + str(total_paramters))
    logger.info("the number of parameters: " + str(total_paramters))

    # setup optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4)

    # setup savedir
    args.savedir = (args.savedir + '/' + args.model + 'bs' +
                    str(args.batch_size) + 'gpu' + str(args.gpu) + '/')
    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    start_epoch = 0
    flag = True

    best_epoch = 0.
    best_overall = 0.
    best_mIoU = 0.
    best_F1 = 0.

    while flag == True:
        for epoch in range(start_epoch, args.max_epochs):
            print('======> Epoch {} starting train.'.format(epoch))
            logger.info('======> Epoch {} starting train.'.format(epoch))

            train(args, train_loader, model, criterion, optimizer, epoch,
                  logger)

            print('======> Epoch {} train finish.'.format(epoch))
            logger.info('======> Epoch {} train finish.'.format(epoch))

            if epoch % 1 == 0 or (epoch + 1) == args.max_epochs:
                print(
                    'Now Epoch {}, starting evaluate on Test dataset.'.format(
                        epoch))
                logger.info('Now starting evaluate on Test dataset.')
                print('length of test set:', len(test_loader))
                logger.info('length of test set: {}'.format(len(test_loader)))

                score, class_iou, class_F1 = test(args, test_loader, model,
                                                  criterion, epoch, logger)

                for k, v in score.items():
                    print('{}: {:.5f}'.format(k, v))
                    logger.info('======>{0:^18} {1:^10}'.format(k, v))

                print('Now print class iou')
                for k, v in class_iou.items():
                    print('{}: {:.5f}'.format(k, v))
                    logger.info('======>{0:^18} {1:^10}'.format(k, v))

                print('Now print class_F1')
                for k, v in class_F1.items():
                    print('{}: {:.5f}'.format(k, v))
                    logger.info('======>{0:^18} {1:^10}'.format(k, v))

                if score["Mean IoU : \t"] > best_mIoU:
                    best_mIoU = score["Mean IoU : \t"]

                if score["Overall Acc : \t"] > best_overall:
                    best_overall = score["Overall Acc : \t"]
                    # save model in best overall Acc
                    model_file_name = args.savedir + '/model.pth'
                    torch.save(model.state_dict(), model_file_name)
                    best_epoch = epoch

                if score["Mean F1 : \t"] > best_F1:
                    best_F1 = score["Mean F1 : \t"]

                print(f"best mean IoU: {best_mIoU}")
                print(f"best overall : {best_overall}")
                print(f"best F1: {best_F1}")
                print(f"best epoch: {best_epoch}")

        if (epoch + 1) == args.max_epochs:
            # print('the best pred mIoU: {}'.format(best_pred))
            flag = False
            break
예제 #2
0
def main(args, logger, summary):
    seed = 6000
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    train_set = SkmtDataSet(args, split='train')
    val_set = SkmtDataSet(args, split='val')
    kwargs = {'num_workers': args.workers, 'pin_memory': True}

    sampler = CustomRandomSampler(train_set, batch_size=args.batch_size)
    batch_sampler = BatchSampler(sampler)

    def worker_init_fn(worker_id):
        np.random.seed(int(seed))

    train_loader = DataLoader(train_set,
                              batch_sampler=batch_sampler,
                              worker_init_fn=worker_init_fn,
                              **kwargs)

    test_loader = DataLoader(val_set,
                             batch_size=1,
                             drop_last=True,
                             worker_init_fn=worker_init_fn,
                             shuffle=False,
                             **kwargs)

    logger.info('======> building network')
    # set model
    model = build_skmtnet(backbone='wide_resnet50_2',
                          auxiliary_head=args.auxiliary,
                          trunk_head=args.trunk_head,
                          num_classes=args.num_classes,
                          output_stride=32)

    logger.info("======> computing network parameters")
    total_paramters = netParams(model)
    logger.info("the number of parameters: " + str(total_paramters))

    # setup optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=args.weight_decay)

    # setup savedir
    args.savedir = (args.savedir + '/' + args.model + 'bs' +
                    str(args.batch_size) + 'gpu' + str(args.gpus) + '/')
    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    # setup optimization criterion
    # , weight = np.array(SkmtDataSet.CLASSES_PIXS_WEIGHTS)
    CRITERION = dict(
        auxiliary=dict(
            losses=dict(
                # smoothce=dict(size_average=True),
                # iou=dict(n_classes=11)
                ce=dict(reduction='mean')
                # dice=dict(smooth=1, p=2, reduction='mean')
            ),
            loss_weights=[1]),
        trunk=dict(
            losses=dict(
                # smoothce=dict(size_average=True),
                # iou=dict(n_classes=11)
                # focal=dict(reduction='mean')
                ce=dict(reduction='mean')
                # dice=dict(smooth=1, p=2, reduction='mean')
            ),
            trunk=dict(
                losses=dict(ce=dict(reduction='mean')
                            # dice=dict(smooth=1, p=2, reduction='mean')
                            ),
                loss_weights=[1])))
    criterion = build_criterion(**CRITERION)

    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)  # set random seed for all GPU
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
        model = model.cuda()
        criterion = criterion.cuda()

    start_epoch = 0
    best_mIoU = 0.

    trainer = Trainer(args=args,
                      dataloader=train_loader,
                      model=model,
                      optimizer=optimizer,
                      criterion=criterion,
                      logger=logger,
                      summary=summary)
    tester = Tester(args=args,
                    dataloader=test_loader,
                    model=model,
                    criterion=criterion,
                    logger=logger,
                    summary=summary)

    writer = summary.create_summary()
    for epoch in range(start_epoch, args.max_epochs):
        trainer.train_one_epoch(epoch, writer, best_mIoU)

        if (epoch % 1 == 0):
            Acc, mAcc, mIoU, FWIoU, tb_overall = tester.test_one_epoch(
                epoch, writer)

            new_pred = mIoU
            if new_pred > best_mIoU:
                best_mIoU = new_pred
                best_overall = tb_overall
                # save the model
                model_file_name = args.savedir + '/best_model.pth'
                state = {
                    "epoch": epoch + 1,
                    "model": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    "criterion": criterion.state_dict()
                }
                torch.save(state, model_file_name)
            logger.info("======>best epoch:")
            logger.info(best_overall)

    model_file_name = args.savedir + '/resume_model.pth'
    state = {
        "epoch": epoch + 1,
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "criterion": criterion.state_dict()
    }
    torch.save(state, model_file_name)
예제 #3
0
def test_model(args):
    """
    main function for testing 
    args:
       args: global arguments
    """
    print("=====> Check if the cached file exists ")
    if not os.path.isfile(args.inform_data_file):
        print("%s is not found" % (args.inform_data_file))
        dataCollect = CamVidTrainInform(
            args.data_dir,
            args.classes,
            train_set_file=args.dataset_list,
            inform_data_file=args.inform_data_file
        )  #collect mean std, weigth_class information
        datas = dataCollect.collectDataAndSave()
        if datas is None:
            print('Error while pickling data. Please check.')
            exit(-1)
    else:
        print("%s exists" % (args.inform_data_file))
        datas = pickle.load(open(args.inform_data_file, "rb"))

    print(args)
    global network_type

    if args.cuda:
        print("=====> Use gpu id: '{}'".format(args.gpus))
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
        if not torch.cuda.is_available():
            raise Exception(
                "No GPU found or Wrong gpu id, please run without --cuda")

    args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
    cudnn.enabled = True

    M = args.M
    N = args.N
    model = CGNet.Context_Guided_Network(classes=args.classes, M=M, N=N)
    network_type = "CGNet"
    print("=====> current architeture:  CGNet_M%sN%s" % (M, N))
    total_paramters = netParams(model)
    print("the number of parameters: " + str(total_paramters))
    print("data['classWeights']: ", datas['classWeights'])
    weight = torch.from_numpy(datas['classWeights'])
    print("=====> Dataset statistics")
    print("mean and std: ", datas['mean'], datas['std'])

    # define optimization criteria
    criteria = CrossEntropyLoss2d(weight, args.ignore_label)
    if args.cuda:
        model = model.cuda()
        criteria = criteria.cuda()

    #load test set
    train_transform = transforms.Compose([transforms.ToTensor()])
    testLoader = data.DataLoader(CamVidValDataSet(args.data_dir,
                                                  args.test_data_list,
                                                  f_scale=1,
                                                  mean=datas['mean']),
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 pin_memory=True,
                                 drop_last=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=====> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            #model.load_state_dict(convert_state_dict(checkpoint['model']))
            model.load_state_dict(checkpoint['model'])
        else:
            print("=====> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    print("=====> beginning test")
    print("length of test set:", len(testLoader))
    mIOU_val, per_class_iu = test(args, testLoader, model, criteria)
    print(mIOU_val)
    print(per_class_iu)
예제 #4
0
def train_model(args):
    """
    args:
       args: global arguments
    """
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    print("=====> checking if inform_data_file exists")
    if not os.path.isfile(args.inform_data_file):
        print("%s is not found" % (args.inform_data_file))
        dataCollect = CityscapesTrainInform(
            args.data_dir,
            args.classes,
            train_set_file=args.dataset_list,
            inform_data_file=args.inform_data_file
        )  #collect mean std, weigth_class information
        datas = dataCollect.collectDataAndSave()
        if datas is None:
            print("error while pickling data. Please check.")
            exit(-1)
    else:
        print("find file: ", str(args.inform_data_file))
        datas = pickle.load(open(args.inform_data_file, "rb"))

    print(args)
    global network_type

    if args.cuda:
        print("=====> use gpu id: '{}'".format(args.gpus))
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
        if not torch.cuda.is_available():
            raise Exception(
                "No GPU found or Wrong gpu id, please run without --cuda")

    #args.seed = random.randint(1, 10000)
    args.seed = 9830

    print("====> Random Seed: ", args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    cudnn.enabled = True

    model = MobileNetV3(model_mode="SMALL", num_classes=args.classes)

    network_type = "MobileNetV3"
    print("=====> current architeture:  MobileNetV3")

    print("=====> computing network parameters")
    total_paramters = netParams(model)
    print("the number of parameters: " + str(total_paramters))

    print("data['classWeights']: ", datas['classWeights'])
    print('=====> Dataset statistics')
    print('mean and std: ', datas['mean'], datas['std'])

    # define optimization criteria
    weight = torch.from_numpy(datas['classWeights'])
    criteria = CrossEntropyLoss2d(weight)

    if args.cuda:
        criteria = criteria.cuda()
        if torch.cuda.device_count() > 1:
            print("torch.cuda.device_count()=", torch.cuda.device_count())
            args.gpu_nums = torch.cuda.device_count()
            model = torch.nn.DataParallel(
                model).cuda()  #multi-card data parallel
        else:
            print("single GPU for training")
            model = model.cuda()  #1-card data parallel

    args.savedir = (args.savedir + args.dataset + '/' + network_type + 'bs' +
                    str(args.batch_size) + 'gpu' + str(args.gpu_nums) + "_" +
                    str(args.train_type) + '/')

    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    train_transform = transforms.Compose([transforms.ToTensor()])

    trainLoader = data.DataLoader(CityscapesDataSet(args.data_dir,
                                                    args.train_data_list,
                                                    crop_size=input_size,
                                                    scale=args.random_scale,
                                                    mirror=args.random_mirror,
                                                    mean=datas['mean']),
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  pin_memory=True,
                                  drop_last=True)
    valLoader = data.DataLoader(CityscapesValDataSet(args.data_dir,
                                                     args.val_data_list,
                                                     f_scale=1,
                                                     mean=datas['mean']),
                                batch_size=1,
                                shuffle=True,
                                num_workers=args.num_workers,
                                pin_memory=True,
                                drop_last=True)

    start_epoch = 0
    if args.resume:
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume)
            start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['model'])
            #model.load_state_dict(convert_state_dict(checkpoint['model']))
            print("=====> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=====> no checkpoint found at '{}'".format(args.resume))

    model.train()
    cudnn.benchmark = True

    logFileLoc = args.savedir + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
        logger.write("\nGlobal configuration as follows:")
        for key, value in vars(args).items():
            logger.write("\n{:16} {}".format(key, value))
        logger.write("\nParameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t\t%s\t\t%s\t\t%s\t\t%s\t\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val)'))
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Global configuration as follows:")
        for key, value in vars(args).items():
            logger.write("\n{:16} {}".format(key, value))
        logger.write("\nParameters: %s" % (str(total_paramters)))
        logger.write(
            "\n%s\t\t%s\t\t%s\t\t%s\t\t%s\t\t" %
            ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val)'))
    logger.flush()

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr, (0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=5e-4)

    print('=====> beginning training')
    for epoch in range(start_epoch, args.max_epochs):
        #training
        lossTr, per_class_iu_tr, mIOU_tr, lr = train(args, trainLoader, model,
                                                     criteria, optimizer,
                                                     epoch)

        #validation
        if epoch % 50 == 0:
            mIOU_val, per_class_iu = val(args, valLoader, model, criteria)
            # record train information
            logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" %
                         (epoch, lossTr, mIOU_tr, mIOU_val, lr))
            logger.flush()
            print("Epoch : " + str(epoch) + ' Details')
            print(
                "\nEpoch No.: %d\tTrain Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f\t lr= %.6f"
                % (epoch, lossTr, mIOU_tr, mIOU_val, lr))
        else:
            # record train information
            logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.7f" %
                         (epoch, lossTr, mIOU_tr, lr))
            logger.flush()
            print("Epoch : " + str(epoch) + ' Details')
            print(
                "\nEpoch No.: %d\tTrain Loss = %.4f\t mIOU(tr) = %.4f\t lr= %.6f"
                % (epoch, lossTr, mIOU_tr, lr))

        #save the model
        model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth'
        state = {"epoch": epoch + 1, "model": model.state_dict()}
        if epoch > args.max_epochs - 10:
            torch.save(state, model_file_name)
        elif not epoch % 20:
            torch.save(state, model_file_name)

    logger.close()
예제 #5
0
파일: main.py 프로젝트: UESTC-Liuxin/SKMT
def main(args, logger, summary):
    cudnn.enabled = True  # Enables bencnmark mode in cudnn, to enable the inbuilt
    cudnn.benchmark = True  # cudnn auto-tuner to find the best algorithm to use for
    # our hardware

    seed = random.randint(1, 10000)
    logger.info('======>random seed {}'.format(seed))

    random.seed(seed)  # python random seed
    np.random.seed(seed)  # set numpy random seed
    torch.manual_seed(seed)  # set random seed for cpu

    # train_set = VaiHinGen(root=args.root, split='trainl',outer_size=2*args.image_size,centre_size=args.image_size)
    # test_set  = VaiHinGen(root=args.root, split='testl',outer_size=2*args.image_size,centre_size=args.image_size)

    train_set = SkmtDataSet(args, split='train')
    val_set = SkmtDataSet(args, split='val')
    kwargs = {'num_workers': args.workers, 'pin_memory': True}

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              drop_last=True,
                              shuffle=False,
                              **kwargs)
    test_loader = DataLoader(val_set,
                             batch_size=1,
                             drop_last=True,
                             shuffle=False,
                             **kwargs)

    logger.info('======> building network')
    # set model
    model = build_skmtnet(backbone='resnet50',
                          auxiliary_head=args.auxiliary,
                          trunk_head='deeplab',
                          num_classes=args.num_classes,
                          output_stride=16)

    logger.info("======> computing network parameters")
    total_paramters = netParams(model)
    logger.info("the number of parameters: " + str(total_paramters))

    # setup optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=args.weight_decay)

    # setup savedir
    args.savedir = (args.savedir + '/' + args.model + 'bs' +
                    str(args.batch_size) + 'gpu' + str(args.gpus) + '/')
    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    # setup optimization criterion
    criterion = Loss(args)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)  # set random seed for all GPU
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus
        model = nn.DataParallel(model).cuda()
        criterion = criterion.cuda()

    start_epoch = 0
    best_epoch = 0.
    best_overall = 0.
    best_mIoU = 0.
    best_F1 = 0.

    trainer = Trainer(args=args,
                      dataloader=train_loader,
                      model=model,
                      optimizer=optimizer,
                      criterion=criterion,
                      logger=logger,
                      summary=summary)
    tester = Tester(args=args,
                    dataloader=test_loader,
                    model=model,
                    criterion=criterion,
                    logger=logger,
                    summary=summary)

    writer = summary.create_summary()
    for epoch in range(start_epoch, args.max_epochs):
        trainer.train_one_epoch(epoch, writer)

        if (epoch % args.show_val_interval == 0):
            score, class_iou, class_acc, class_F1 = tester.test_one_epoch(
                epoch, writer)

            logger.info('======>Now print overall info:')
            for k, v in score.items():
                logger.info('======>{0:^18} {1:^10}'.format(k, v))

            logger.info('======>Now print class acc')
            for k, v in class_acc.items():
                print('{}: {:.5f}'.format(k, v))
                logger.info('======>{0:^18} {1:^10}'.format(k, v))

            logger.info('======>Now print class iou')
            for k, v in class_iou.items():
                print('{}: {:.5f}'.format(k, v))
                logger.info('======>{0:^18} {1:^10}'.format(k, v))

            logger.info('======>Now print class_F1')
            for k, v in class_F1.items():
                logger.info('======>{0:^18} {1:^10}'.format(k, v))

            if score["Mean IoU(8) : \t"] > best_mIoU:
                best_mIoU = score["Mean IoU(8) : \t"]

            if score["Overall Acc : \t"] > best_overall:
                best_overall = score["Overall Acc : \t"]
                # save model in best overall Acc
                model_file_name = args.savedir + '/best_model.pth'
                torch.save(model.state_dict(), model_file_name)
                best_epoch = epoch

            if score["Mean F1 : \t"] > best_F1:
                best_F1 = score["Mean F1 : \t"]

            logger.info("======>best mean IoU:{}".format(best_mIoU))
            logger.info("======>best overall : {}".format(best_overall))
            logger.info("======>best F1: {}".format(best_F1))
            logger.info("======>best epoch: {}".format(best_epoch))

            # save the model
            model_file_name = args.savedir + '/model.pth'
            state = {"epoch": epoch + 1, "model": model.state_dict()}

            logger.info('======> Now begining to save model.')
            torch.save(state, model_file_name)
            logger.info('======> Save done.')