Exemplo n.º 1
0
def main():
    global use_apex
    global args

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent'},
                              use_gpu=use_gpu,
                              args=vars(args))
    print(model)
    print("Model size: {:.3f} M".format(count_num_param(model)))
    if use_gpu:
        print("using gpu")
        model = model.cuda()
    print("criterion===>")
    criterion = get_criterion(dm.num_train_pids, use_gpu, args)
    print(criterion)
    print("regularizer===>")
    regularizer = get_regularizer(vars(args))
    print(regularizer)
    print("optimizer===>")
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    print(optimizer)
    print("scheduler===>")
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           'max',
                                                           factor=0.1,
                                                           patience=5,
                                                           verbose=True)
    print(scheduler)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        try:
            checkpoint = torch.load(args.load_weights)
        except Exception as e:
            print(e)
            checkpoint = torch.load(args.load_weights,
                                    map_location={'cuda:0': 'cpu'})

        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    max_r1 = 0

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        state = model.state_dict()
        state.update(checkpoint['state_dict'])
        model.load_state_dict(state)
        optimizer.load_state_dict(checkpoint['optimizer'])
        args.start_epoch = checkpoint['epoch'] + 1
        max_r1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      checkpoint['rank1']))

    if use_apex:
        print("using apex")
        model, optimizer = amp.initialize(model, optimizer, opt_level="O0")

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query'], testloader_dict[
                name]['query_flip']
            galleryloader = testloader_dict[name]['gallery'], testloader_dict[
                name]['gallery_flip']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("==> Start training")

    if args.fixbase_epoch > 0:
        oldenv = os.environ.get('sa', '')
        os.environ['sa'] = ''
        print(
            "Train {} for {} epochs while keeping other layers frozen".format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion,
                  regularizer,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)
        os.environ['sa'] = oldenv

    for epoch in range(args.start_epoch, args.max_epoch):
        auto_reset_learning_rate(optimizer, args)

        print(
            f"===========================start epoch {epoch + 1}  {now()}==========================================="
        )
        print(f"lr:{optimizer.param_groups[0]['lr']}")

        loss = train(epoch,
                     model,
                     criterion,
                     regularizer,
                     optimizer,
                     trainloader,
                     use_gpu,
                     fixbase=False)
        train_time += round(time.time() - start_train_time)
        state_dict = model.state_dict()

        rank1 = 0

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query'], testloader_dict[
                    name]['query_flip']
                galleryloader = testloader_dict[name][
                    'gallery'], testloader_dict[name]['gallery_flip']
                rank1 = test(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            if max_r1 < rank1:
                print('Save!', max_r1, rank1)
                save_checkpoint(
                    {
                        'state_dict': state_dict,
                        'rank1': rank1,
                        'epoch': epoch,
                        'optimizer': optimizer.state_dict(),
                    }, False, osp.join(args.save_dir,
                                       'checkpoint_best.pth.tar'))

                max_r1 = rank1

        save_checkpoint(
            {
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
                'optimizer': optimizer.state_dict(),
            }, False,
            osp.join(args.save_dir,
                     'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

        scheduler.step(rank1)

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('==========\nArgs:{}\n=========='.format(args))

    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        print('Currently using CPU, however, GPU is highly recommended')

    print('Initializing video data manager')
    dm = VideoDataManager(use_gpu, **video_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print('Initializing model: {}'.format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent', 'htri'},
                              pretrained=not args.no_pretrained,
                              use_gpu=use_gpu)
    print('Model size: {:.3f} M'.format(count_num_param(model)))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        best_rank1 = checkpoint['rank1']
        print('Loaded checkpoint from "{}"'.format(args.resume))
        print('- start_epoch: {}\n- rank1: {}'.format(args.start_epoch,
                                                      best_rank1))

    model = nn.DataParallel(model).cuda() if use_gpu else model

    criterion = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                 use_gpu=use_gpu,
                                 label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    optimizer = init_optimizer(model, **optimizer_kwargs(args))
    scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args))

    if args.evaluate:
        print('Evaluate only')

        for name in args.target_names:
            print('Evaluating {} ...'.format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           args.pool_tracklet_features,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print('=> Start training')

    if args.fixbase_epoch > 0:
        print(
            'Train {} for {} epochs while keeping other layers frozen'.format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion_xent,
                  criterion_htri,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)
            train_time += round(time.time() - start_train_time)

        print('Done. All layers are open to train for {} epochs'.format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print('=> Test')

            for name in args.target_names:
                print('Evaluating {} ...'.format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader,
                             args.pool_tracklet_features, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            save_checkpoint(
                {
                    'state_dict': model.state_dict(),
                    'rank1': rank1,
                    'epoch': epoch,
                }, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        'Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.'.
        format(elapsed, train_time))
    ranklogger.show_summary()
def main():
    global args
    
    torch.manual_seed(args.seed)
    if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent', 'htri'})
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)
    
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True)
        
            if args.visualize_ranks:
                visualize_ranked_results(
                    distmat, dm.return_testdataset_by_name(name),
                    save_dir=osp.join(args.save_dir, 'ranked_results', name),
                    topk=20
                )
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("=> Start training")

    if args.fixbase_epoch > 0:
        print("Train {} for {} epochs while keeping other layers frozen".format(args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu, fixbase=True)
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)
        
        scheduler.step()
        
        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch:
            print("=> Test")
            
            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            
            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
    ranklogger.show_summary()
Exemplo n.º 4
0
def main():
    global args

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    # ReID-Stream:
    print("Initializing ReID-Stream: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              reid_dim=args.reid_dim,
                              loss={'xent', 'htri'})
    print("ReID Model size: {:.3f} M".format(count_num_param(model)))

    criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids,
                                      use_gpu=use_gpu,
                                      label_smooth=args.label_smooth)
    criterion_htri = TripletLoss(margin=args.margin)

    # 2. Optimizer
    # Main ReID-Stream:
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("==> Start training")

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, \
              optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                rank1 = test(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()
Exemplo n.º 5
0
def main():
    global args

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_train_{}.txt'.format(time.strftime("%Y-%m-%d-%H-%M-%S"))
    if args.evaluate:
        log_name.replace('train', 'test')
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print(' '.join(sys.argv))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")

    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    if hasattr(dm, 'lfw_dataset'):
        lfw = dm.lfw_dataset
        print('LFW dataset is used!')
    else:
        lfw = None

    trainloader, trainloader_dict, testloader_dict = dm.return_dataloaders()

    num_train_pids = dm.num_train_pids

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(
        name=args.arch,
        num_classes=num_train_pids,
        loss={'xent', 'htri'},
        pretrained=False if args.load_weights else 'imagenet',
        grayscale=args.grayscale,
        normalize_embeddings=args.normalize_embeddings,
        normalize_fc=args.normalize_fc,
        convbn=args.convbn)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    count_flops(model, args.height, args.width, args.grayscale)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        load_weights(model, args.load_weights)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))

        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(model).cuda()
        model = model.cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            if not 'lfw' in name.lower():
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']
                distmat = test(args,
                               model,
                               queryloader,
                               galleryloader,
                               use_gpu,
                               return_distmat=True)

                if args.visualize_ranks:
                    visualize_ranked_results(
                        distmat,
                        dm.return_testdataset_by_name(name),
                        save_dir=osp.join(args.save_dir, 'ranked_results',
                                          name),
                        topk=20)
            else:
                model.eval()
                same_acc, diff_acc, all_acc, auc, thresh = evaluate(
                    args,
                    dm.lfw_dataset,
                    model,
                    compute_embeddings_lfw,
                    args.test_batch_size,
                    verbose=False,
                    show_failed=args.show_failed)
                log.info('Validation accuracy: {0:.4f}, {1:.4f}'.format(
                    same_acc, diff_acc))
                log.info('Validation accuracy mean: {0:.4f}'.format(all_acc))
                log.info('Validation AUC: {0:.4f}'.format(auc))
                log.info('Estimated threshold: {0:.4f}'.format(thresh))
        return

    criterions = choose_losses(args, dm, model, use_gpu)

    if not args.evaluate and len(criterions) == 0:
        raise AssertionError('No loss functions were chosen!')
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))

    if args.load_optim:
        checkpoint = torch.load(args.load_weights)
        optimizer.load_state_dict(checkpoint['optim'])
        print("Loaded optimizer from '{}'".format(args.load_weights))

    for param_group in optimizer.param_groups:
        param_group['lr'] = args.lr
        param_group['weight_decay'] = args.weight_decay

    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    train_writer = SummaryWriter(osp.join(args.save_dir, 'train_log'))
    test_writer = SummaryWriter(osp.join(args.save_dir, 'test_log'))
    print("=> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train {} for {} epochs while keeping other layers frozen".format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterions,
                  optimizer,
                  trainloader,
                  use_gpu,
                  train_writer,
                  fixbase=True,
                  lfw=lfw)
            train_time += round(time.time() - start_train_time)

            for name in args.target_names:
                if not 'lfw' in name.lower():
                    print("Evaluating {} ...".format(name))
                    queryloader = testloader_dict[name]['query']
                    galleryloader = testloader_dict[name]['gallery']
                    testloader = testloader_dict[name]['test']
                    criteria = None
                    rank1 = test(args,
                                 model,
                                 queryloader,
                                 galleryloader,
                                 use_gpu,
                                 testloader=testloader,
                                 criterions=criteria)
                else:
                    model.eval()
                    same_acc, diff_acc, all_acc, auc, thresh = evaluate(
                        args,
                        dm.lfw_dataset,
                        model,
                        compute_embeddings_lfw,
                        args.test_batch_size,
                        verbose=False,
                        show_failed=args.show_failed)
                    print('Validation accuracy: {0:.4f}, {1:.4f}'.format(
                        same_acc, diff_acc))
                    print('Validation accuracy mean: {0:.4f}'.format(all_acc))
                    print('Validation AUC: {0:.4f}'.format(auc))
                    print('Estimated threshold: {0:.4f}'.format(thresh))
                    rank1 = all_acc

        print("Done. All layers are open to train for {} epochs".format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    for epoch in range(args.start_epoch, args.max_epoch):
        for criterion in criterions:
            criterion.train_stats.reset()

        start_train_time = time.time()
        train(epoch,
              model,
              criterions,
              optimizer,
              trainloader,
              use_gpu,
              train_writer,
              lfw=lfw)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            num_iter = (epoch + 1) * len(trainloader)
            if not args.no_train_quality:
                for name in args.source_names:
                    print(
                        "Measure quality on the {} train set...".format(name))
                    queryloader = trainloader_dict[name]['query']
                    galleryloader = trainloader_dict[name]['gallery']
                    rank1 = test(args, model, queryloader, galleryloader,
                                 use_gpu)
                    train_writer.add_scalar('rank1/{}'.format(name), rank1,
                                            num_iter)

            print("=> Test")

            for name in args.target_names:
                if not 'lfw' in name.lower():
                    print("Evaluating {} ...".format(name))
                    queryloader = testloader_dict[name]['query']
                    galleryloader = testloader_dict[name]['gallery']
                    testloader = testloader_dict[name]['test']
                    criteria = criterions
                    if args.no_loss_on_val:
                        criteria = None
                    rank1 = test(args,
                                 model,
                                 queryloader,
                                 galleryloader,
                                 use_gpu,
                                 testloader=testloader,
                                 criterions=criteria)
                    test_writer.add_scalar('rank1/{}'.format(name), rank1,
                                           num_iter)
                    if not args.no_loss_on_val:
                        for criterion in criterions:
                            test_writer.add_scalar(
                                'loss/{}'.format(criterion.name),
                                criterion.test_stats.avg, num_iter)
                            criterion.test_stats.reset()
                    ranklogger.write(name, epoch + 1, rank1)
                else:
                    model.eval()
                    same_acc, diff_acc, all_acc, auc, thresh = evaluate(
                        args,
                        dm.lfw_dataset,
                        model,
                        compute_embeddings_lfw,
                        args.test_batch_size,
                        verbose=False,
                        show_failed=args.show_failed)
                    print('Validation accuracy: {0:.4f}, {1:.4f}'.format(
                        same_acc, diff_acc))
                    print('Validation accuracy mean: {0:.4f}'.format(all_acc))
                    print('Validation AUC: {0:.4f}'.format(auc))
                    print('Estimated threshold: {0:.4f}'.format(thresh))
                    test_writer.add_scalar('Accuracy/Val_same_accuracy',
                                           same_acc, num_iter)
                    test_writer.add_scalar('Accuracy/Val_diff_accuracy',
                                           diff_acc, num_iter)
                    test_writer.add_scalar('Accuracy/Val_accuracy', all_acc,
                                           num_iter)
                    test_writer.add_scalar('Accuracy/AUC', auc, num_iter)
                    rank1 = all_acc
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_dict = {
                'state_dict': state_dict,
                'epoch': epoch,
                'optim': optimizer.state_dict()
            }

            if len(args.target_names):
                save_dict['rank1'] = rank1

            save_checkpoint(
                save_dict, False,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()
Exemplo n.º 6
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing MultiScale data manager")
    assert args.train_batch_size % args.train_loss_batch_size == 0, "'{}' is not divisable by {}".format(args.train_loss_batch_size, args.train_loss_batch_size)
    dm = ImageDataManager(use_gpu, scales=[224,160], **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()
    # sys.exit(0)

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, input_size=args.width, loss={'xent'}, use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))
    # print(model)

    criterion = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth)
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma)
    # # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-04)

    if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query']
            galleryloader = testloader_dict[name]['gallery']
            test_set = dm.return_testdataset_by_name(name)
            rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu, visualize=args.visualize_ranks)

        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    maplogger = RankLogger(args.source_names, args.target_names)
    train_time = 0


    # Tensorboard
    writer = SummaryWriter(log_dir=osp.join('runs', args.save_dir))
    print("=> Start training")


    if args.fixbase_epoch > 0:
        print("Train {} for {} epochs while keeping other layers frozen".format(args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            loss, prec1 = train(epoch, model, criterion, optimizer, trainloader, writer, use_gpu, fixbase=True)
            writer.add_scalar('train/loss', loss, epoch+1)
            writer.add_scalar('train/prec1', prec1, epoch+1)
            print('Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'.format(epoch+1, loss, prec1))
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)

    args.start_epoch += args.fixbase_epoch
    args.max_epoch += args.fixbase_epoch

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        loss, prec1 = train(epoch, model, criterion, optimizer, trainloader, writer, use_gpu)
        writer.add_scalar('train/loss', loss, epoch+1)
        writer.add_scalar('train/prec1', prec1, epoch+1)
        print('Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'.format(epoch+1, loss, prec1))
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch:
            print("=> Test")

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query']
                galleryloader = testloader_dict[name]['gallery']

                test_set = dm.return_testdataset_by_name(name)

                if epoch+1 == args.max_epoch:
                    rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu, visualize=True)
                else:
                    rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu)

                writer.add_scalar(name + '_test/top1', rank1, epoch+1)
                writer.add_scalar(name + '_test/mAP', mAP, epoch+1)

                ranklogger.write(name, epoch + 1, rank1)
                maplogger.write(name, epoch + 1, mAP)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))


    # save last checkpoint
    save_checkpoint({
        'state_dict': state_dict,
        'rank1': rank1,
        'epoch': epoch,
    }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
    ranklogger.show_summary()
    maplogger.show_summary()
Exemplo n.º 7
0
def main():
    global args, dropout_optimizer

    torch.manual_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    log_name = 'log_test.txt' if args.evaluate else 'log_train.txt'
    sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU, however, GPU is highly recommended")

    print("Initializing image data manager")
    dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args))
    trainloader, testloader_dict = dm.return_dataloaders()

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dm.num_train_pids,
                              loss={'xent'},
                              use_gpu=use_gpu,
                              dropout_optimizer=dropout_optimizer,
                              args=vars(args))
    print(model)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    # criterion = WrappedCrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth)
    criterion, fix_criterion, switch_criterion, htri_param_controller = get_criterions(
        dm.num_train_pids, use_gpu, args)
    regularizer, reg_param_controller = get_regularizer(args.regularizer)
    optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args))
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if args.load_weights and check_isfile(args.load_weights):
        # load pretrained weights but ignore layers that don't match in size
        try:

            checkpoint = torch.load(args.load_weights)
        except Exception as e:
            print(e)
            checkpoint = torch.load(args.load_weights,
                                    map_location={'cuda:0': 'cpu'})

        # dropout_optimizer.set_p(checkpoint.get('dropout_p', 0))
        # print(list(checkpoint.keys()), checkpoint['dropout_p'])

        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)
        print("Loaded pretrained weights from '{}'".format(args.load_weights))

    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        state = model.state_dict()
        state.update(checkpoint['state_dict'])
        model.load_state_dict(state)
        # args.start_epoch = checkpoint['epoch'] + 1
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch,
                                                      checkpoint['rank1']))

    if use_gpu:
        model = nn.DataParallel(
            model,
            device_ids=list(range(len(args.gpu_devices.split(','))))).cuda()

    if args.evaluate:
        print("Evaluate only")

        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query'], testloader_dict[
                name]['query_flip']
            galleryloader = testloader_dict[name]['gallery'], testloader_dict[
                name]['gallery_flip']
            distmat = test(model,
                           queryloader,
                           galleryloader,
                           use_gpu,
                           return_distmat=True)

            if args.visualize_ranks:
                visualize_ranked_results(distmat,
                                         dm.return_testdataset_by_name(name),
                                         save_dir=osp.join(
                                             args.save_dir, 'ranked_results',
                                             name),
                                         topk=20)
        return

    start_time = time.time()
    ranklogger = RankLogger(args.source_names, args.target_names)
    train_time = 0
    print("==> Start training")

    if os.environ.get('test_first') is not None:
        for name in args.target_names:
            print("Evaluating {} ...".format(name))
            queryloader = testloader_dict[name]['query'], testloader_dict[
                name]['query_flip']
            galleryloader = testloader_dict[name]['gallery'], testloader_dict[
                name]['gallery_flip']
            rank1 = test(model, queryloader, galleryloader, use_gpu)

    if args.fixbase_epoch > 0:
        oldenv = os.environ.get('sa', '')
        os.environ['sa'] = ''
        print(
            "Train {} for {} epochs while keeping other layers frozen".format(
                args.open_layers, args.fixbase_epoch))
        initial_optim_state = optimizer.state_dict()

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  fix_criterion,
                  regularizer,
                  optimizer,
                  trainloader,
                  use_gpu,
                  fixbase=True)
            train_time += round(time.time() - start_train_time)

        print("Done. All layers are open to train for {} epochs".format(
            args.max_epoch))
        optimizer.load_state_dict(initial_optim_state)
        os.environ['sa'] = oldenv

    max_r1 = 0

    for epoch in range(args.start_epoch, args.max_epoch):
        dropout_optimizer.set_epoch(epoch)
        reg_param_controller.set_epoch(epoch)
        htri_param_controller.set_epoch(epoch)
        dropout_optimizer.set_training(True)
        start_train_time = time.time()
        print(epoch, args.switch_loss)
        print(criterion)

        cond = args.switch_loss > 0 and epoch >= args.switch_loss
        cond = cond or (args.switch_loss < 0
                        and args.switch_loss + args.max_epoch < epoch)
        if cond:
            print('Switch!')
            criterion = switch_criterion
        train(epoch,
              model,
              criterion,
              regularizer,
              optimizer,
              trainloader,
              use_gpu,
              fixbase=False,
              switch_loss=cond)
        train_time += round(time.time() - start_train_time)

        if use_gpu:
            state_dict = model.module.state_dict()
        else:
            state_dict = model.state_dict()

        save_checkpoint(
            {
                'state_dict': state_dict,
                'rank1': 0,
                'epoch': epoch,
                'dropout_p': dropout_optimizer.p,
            }, False,
            osp.join(args.save_dir,
                     'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (
                epoch + 1) % args.eval_freq == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            dropout_optimizer.set_training(False)  # IMPORTANT!

            for name in args.target_names:
                print("Evaluating {} ...".format(name))
                queryloader = testloader_dict[name]['query'], testloader_dict[
                    name]['query_flip']
                galleryloader = testloader_dict[name][
                    'gallery'], testloader_dict[name]['gallery_flip']
                print('!!!!!!!!FC!!!!!!!!')
                os.environ['NOFC'] = ''
                rank1 = test(model, queryloader, galleryloader, use_gpu)
                ranklogger.write(name, epoch + 1, rank1)

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            if max_r1 < rank1:
                print('Save!', max_r1, rank1)
                save_checkpoint(
                    {
                        'state_dict': state_dict,
                        'rank1': rank1,
                        'epoch': epoch,
                        'dropout_p': dropout_optimizer.p,
                    }, False, osp.join(args.save_dir,
                                       'checkpoint_best.pth.tar'))

                max_r1 = rank1

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    ranklogger.show_summary()