Exemplo n.º 1
0
def main():
    torch.manual_seed(1)
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_devices
    use_gpu = torch.cuda.is_available()
    if use_cpu: use_gpu = False

    if use_gpu:
        print("Currently using GPU {}".format(gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(1)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing model: {}".format(arch))
    model = init_model(name=arch, num_classes=576, loss_type=loss_type)
    print("Model size: {:.3f} M".format(count_num_param(model)))
    if use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = init_optim(optim, model.parameters(), lr, weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=stepsize,
                                         gamma=gamma)

    model.train()
    cnt = 0
    for epoch in range(start_epoch, max_epoch, 2):
        for step in range(2):
            x = torch.randn(1, 3, 200, 200)
            y = torch.randint(low=0, high=576, size=(1, ), dtype=torch.int64)
            if use_gpu:
                x = x.cuda()
                y = y.cuda()
            scheduler.step()
            cnt += 1
            print(cnt, scheduler.get_lr())
            output = model(x)
            # loss = nn.CrossEntropyLoss()(output[0], y)
            loss = torch.tensor(0.0, dtype=torch.float32).cuda()
            # loss = torch.tensor(0.0, dtype=torch.float32)
            loss.requires_grad = True
            optimizer.zero_grad()
            loss.backward()
            print(loss)
            print(loss._grad)
            optimizer.step()
    print('Done.')
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_imgreid_dataset(
        root=args.root, name=args.dataset, split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split,
        use_lmdb=args.use_lmdb,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train,
                     use_lmdb=args.use_lmdb, lmdb_path=dataset.train_lmdb_path),
        batch_size=args.train_batch, shuffle=True, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test,
                     use_lmdb=args.use_lmdb, lmdb_path=dataset.query_lmdb_path),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test,
                     use_lmdb=args.use_lmdb, lmdb_path=dataset.gallery_lmdb_path),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent'}, use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.fixbase_epoch > 0:
        if hasattr(model, 'classifier') and isinstance(model.classifier, nn.Module):
            optimizer_tmp = init_optim(args.optim, model.classifier.parameters(), args.fixbase_lr, args.weight_decay)
        else:
            print("Warn: model has no attribute 'classifier' and fixbase_epoch is reset to 0")
            args.fixbase_epoch = 0

    if args.load_weights:
        # load pretrained weights but ignore layers that don't match in size
        print("Loading pretrained weights from '{}'".format(args.load_weights))
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        rank1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(start_epoch, rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    if args.fixbase_epoch > 0:
        print("Train classifier for {} epochs while keeping base network frozen".format(args.fixbase_epoch))

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch, model, criterion, optimizer_tmp, trainloader, use_gpu, freeze_bn=True)
            train_time += round(time.time() - start_train_time)

        del optimizer_tmp
        print("Now open all layers for training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)
        
        scheduler.step()
        
        if (epoch+1) > args.start_eval and args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1
            
            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            
            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_imgreid_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
        use_lmdb=args.use_lmdb,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset(dataset.train,
                     transform=transform_train,
                     use_lmdb=args.use_lmdb,
                     lmdb_path=dataset.train_lmdb_path),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query,
                     transform=transform_test,
                     use_lmdb=args.use_lmdb,
                     lmdb_path=dataset.train_lmdb_path),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery,
                     transform=transform_test,
                     use_lmdb=args.use_lmdb,
                     lmdb_path=dataset.train_lmdb_path),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent', 'htri'})
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion_xent = CrossEntropyLabelSmooth(
        num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if args.load_weights:
        # load pretrained weights but ignore layers that don't match in size
        if check_isfile(args.load_weights):
            checkpoint = torch.load(args.load_weights)
            pretrain_dict = checkpoint['state_dict']
            model_dict = model.state_dict()
            pretrain_dict = {
                k: v
                for k, v in pretrain_dict.items()
                if k in model_dict and model_dict[k].size() == v.size()
            }
            model_dict.update(pretrain_dict)
            model.load_state_dict(model_dict)
            print("Loaded pretrained weights from '{}'".format(
                args.load_weights))

    if args.resume:
        if check_isfile(args.resume):
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            args.start_epoch = checkpoint['epoch']
            rank1 = checkpoint['rank1']
            print("Loaded checkpoint from '{}'".format(args.resume))
            print("- start_epoch: {}\n- rank1: {}".format(
                args.start_epoch, rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        distmat = test(model,
                       queryloader,
                       galleryloader,
                       use_gpu,
                       return_distmat=True)
        if args.vis_ranked_res:
            visualize_ranked_results(
                distmat,
                dataset,
                save_dir=osp.join(args.save_dir, 'ranked_results'),
                topk=20,
            )
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(args.start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 4
0
# Miscs
parser.add_argument('--print-freq', type=int, default=10, help="print frequency")
parser.add_argument('--seed', type=int, default=1, help="manual seed")
parser.add_argument('--resume', type=str, default='', metavar='PATH')
parser.add_argument('--evaluate', action='store_true', help="evaluation only")
parser.add_argument('--eval-step', type=int, default=-1,
                    help="run evaluation for every N epochs (set to -1 to test after training)")
parser.add_argument('--start-eval', type=int, default=0, help="start to evaluate after specific epoch")
parser.add_argument('--save-dir', type=str, default='log')
parser.add_argument('--use-cpu', action='store_true', help="use cpu")
parser.add_argument('--gpu-devices', default='0', type=str, help='gpu device ids for CUDA_VISIBLE_DEVICES')

args = parser.parse_args()

model = models.init_model(name=args.arch, num_classes=751, loss={'xent', 'htri'})
optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)

'''warm up与其他学习率方法的结合(基于LambdaLR)'''
config = {}
config["max_num_epochs"] = 60
warm_up_epochs = 10
lr_milestones = [20,40]

# MultiStepLR without warm up
multistep_lr = lambda epoch: 0.1**len([m for m in lr_milestones if m <= epoch])

# warm_up_with_multistep_lr
warm_up_with_multistep_lr = lambda epoch: (epoch+1) / warm_up_epochs if epoch < warm_up_epochs \
                            else 0.1**len([m for m in lr_milestones if m <= epoch])

# warm_up_with_step_lr
Exemplo n.º 5
0
def train_each_teacher(num_epoch, train_data, train_label, test_data,
                       test_label, save_path):

    torch.manual_seed(config.seed)
    print('len of train_data in network', len(train_data))
    os.environ['CUDA_VISIBLE_DEVICES'] = config.gpu_devices
    print('it is training now')
    use_gpu = torch.cuda.is_available()
    if config.use_cpu: use_gpu = False
    print('whether evaluate', config.evaluate)

    if use_gpu:
        print("Currently using GPU {}".format(config.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(config.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    transform_train = T.Compose([
        T.Random2DTranslation(config.height, config.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((config.height, config.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False
    trainloader = DataLoader(
        ImageDataset(train_data, label=train_label, transform=transform_train),
        batch_size=config.train_batch,
        shuffle=True,
        num_workers=config.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    testloader = DataLoader(
        ImageDataset(test_data, label=test_label, transform=transform_test),
        batch_size=config.test_batch,
        shuffle=False,
        num_workers=config.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format('resnet50m'))
    model = models.init_model(name=config.arch,
                              num_classes=config.nb_labels,
                              loss={'xent'},
                              use_gpu=use_gpu)
    if use_gpu:
        model = nn.DataParallel(model).cuda()
    criterion = nn.MultiLabelSoftMarginLoss()

    optimizer = init_optim(config.optim, model.parameters(), config.lr,
                           config.weight_decay)

    if config.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=config.stepsize,
                                        gamma=config.gamma)

    print("==> Start training")

    start_time = time.time()
    for epoch in range(num_epoch):
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)
        if config.stepsize > 0: scheduler.step()
        rank1 = test(model, testloader, use_gpu)

    rank1 = test(model, testloader, use_gpu)

    if use_gpu:
        state_dict = model.module.state_dict()
    else:
        state_dict = model.state_dict()
    print('save model', save_path)
    torch.save(state_dict, save_path)

    #print("==>  Hamming Score {:.3%}".format(rank1))

    elapsed = round(time.time() - start_time)

    print("Finished. Training time (h:m:s): {}.".format(elapsed))
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_imgreid_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
        use_lmdb=args.use_lmdb,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        # T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainset = ImageDataset(dataset.train,
                            transform=transform_train,
                            use_lmdb=args.use_lmdb,
                            lmdb_path=dataset.train_lmdb_path)
    trainloader = DataLoader(
        trainset,
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryset = ImageDataset(dataset.query,
                            transform=transform_test,
                            use_lmdb=args.use_lmdb,
                            lmdb_path=dataset.query_lmdb_path)
    queryloader = DataLoader(
        queryset,
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryset = ImageDataset(dataset.gallery,
                              transform=transform_test,
                              use_lmdb=args.use_lmdb,
                              lmdb_path=dataset.gallery_lmdb_path)
    galleryloader = DataLoader(
        galleryset,
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent'},
                              use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))
    # summary(model, (3, 160, 64))

    criterion = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids,
                                        use_gpu=use_gpu)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.fixbase_epoch > 0:
        if hasattr(model, 'classifier') and isinstance(model.classifier,
                                                       nn.Module):
            optimizer_tmp = init_optim(args.optim,
                                       model.classifier.parameters(),
                                       args.fixbase_lr, args.weight_decay)
        else:
            print(
                "Warn: model has no attribute 'classifier' and fixbase_epoch is reset to 0"
            )
            args.fixbase_epoch = 0

    if args.load_weights:
        # load pretrained weights but ignore layers that don't match in size
        print("Loading pretrained weights from '{}'".format(args.load_weights))
        if torch.cuda.is_available():
            checkpoint = torch.load(args.load_weights)
        else:
            checkpoint = torch.load(args.load_weights, map_location='cpu')
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        rank1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(start_epoch, rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train classifier for {} epochs while keeping base network frozen".
            format(args.fixbase_epoch))

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion,
                  optimizer_tmp,
                  trainloader,
                  use_gpu,
                  freeze_bn=True)
            train_time += round(time.time() - start_train_time)

        del optimizer_tmp
        print("Now open all layers for training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))
        '''
        if use_gpu:
            state_dict = model.module.state_dict()
        else:
            state_dict = model.state_dict()
            
        save_checkpoint({
            'state_dict': state_dict,
            'epoch': epoch,
        }, True, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))
        '''

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 7
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'), mode='a')
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'), mode='a')
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_imgreid_dataset(name=args.dataset,
                                                dataset_dir=args.root,
                                                fore_dir=args.fore_dir)

    transform_train = ST.Compose([
        ST.Scale((args.height, args.width), interpolation=3),
        ST.RandomHorizontalFlip(),
        ST.ToTensor(),
        ST.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ST.RandomErasing(0.5)
    ])

    transform_test = ST.Compose([
        ST.Scale((args.height, args.width), interpolation=3),
        ST.ToTensor(),
        ST.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset_hardSplit_seg(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler(dataset.train,
                                      num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids)
    print(model)

    criterion_xent = CrossEntropyLabelSmooth(use_gpu=use_gpu)
    criterion_htri = TripletLoss()
    criterion_mask = MaskLoss()
    criterion_split = HardSplitLoss()
    criterion_cluster = ClusterLoss()

    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)

    if args.resume:
        if check_isfile(args.resume):
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
            args.start_epoch = checkpoint['epoch']
            rank1 = checkpoint['rank1']
            print("Loaded checkpoint from '{}'".format(args.resume))
            print("- start_epoch: {}\n- rank1: {}".format(
                args.start_epoch, rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(args.start_epoch, args.max_epoch):

        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, criterion_mask,
              criterion_split, criterion_cluster, optimizer, trainloader,
              use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and (
                epoch + 1) % args.eval_step == 0 or epoch == 0:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
    print("==========\nArgs:{}\n==========".format(args))
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_vidreid_dataset(root=args.root, name=args.dataset)

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    # decompose tracklets into images for image-based training
    new_train = []
    for img_paths, pid, camid in dataset.train:
        for img_path in img_paths:
            new_train.append((img_path, pid, camid))

    trainloader = DataLoader(
        ImageDataset(new_train, transform=transform_train),
        sampler=RandomIdentitySampler(new_train, num_instances=args.num_instances),
        batch_size=args.train_batch, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=True,
    )

    queryloader = DataLoader(
        VideoDataset(dataset.query, seq_len=args.seq_len, sample='evenly', transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    galleryloader = DataLoader(
        VideoDataset(dataset.gallery, seq_len=args.seq_len, sample='evenly', transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'xent', 'htri'})
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.load_weights:
        # load pretrained weights but ignore layers that don't match in size
        print("Loading pretrained weights from '{}'".format(args.load_weights))
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        rank1 = checkpoint['rank1']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}\n- rank1: {}".format(start_epoch, rank1))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, args.pool, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)
        
        scheduler.step()
        
        if (epoch+1) > args.start_eval and args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu)
            is_best = rank1 > best_rank1
            
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            
            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
Exemplo n.º 9
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_vidreid_dataset(root=args.root,
                                                name=args.dataset)

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    # decompose tracklets into images
    new_train = []
    for img_paths, pid, camid in dataset.train:
        for img_path in img_paths:
            new_train.append((img_path, pid, camid))

    trainloader = DataLoader(
        ImageDataset(new_train, transform=transform_train),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        VideoDataset(dataset.query,
                     seq_len=args.seq_len,
                     sample='evenly',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        VideoDataset(dataset.gallery,
                     seq_len=args.seq_len,
                     sample='evenly',
                     transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent'})
    print("Model size: {:.3f} M".format(count_num_param(model)))

    criterion = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids,
                                        use_gpu=use_gpu)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.fixbase_epoch > 0:
        optimizer_tmp = init_optim(args.optim, model.classifier.parameters(),
                                   args.fixbase_lr, args.weight_decay)

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, args.pool, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    if args.fixbase_epoch > 0:
        print(
            "Train classifier for {} epochs while keeping base network frozen".
            format(args.fixbase_epoch))

        for epoch in range(args.fixbase_epoch):
            start_train_time = time.time()
            train(epoch,
                  model,
                  criterion,
                  optimizer_tmp,
                  trainloader,
                  use_gpu,
                  freeze_bn=True)
            train_time += round(time.time() - start_train_time)

        del optimizer_tmp
        print("Now open all layers for training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, args.pool, use_gpu)
            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 10
0
    ckpt_path2 = os.path.join(
        args.snapshot_root,
        'six_snapshot_iter_' + str(parameters['snap_num']) + '.pth')
    ckpt_path3 = os.path.join(
        args.snapshot_root,
        'att_snapshot_iter_' + str(parameters['snap_num']) + '.pth')
    model_rgb.load_state_dict(torch.load(ckpt_path1, map_location='cpu'))
    model_six.load_state_dict(torch.load(ckpt_path2, map_location='cpu'))
    model_att.load_state_dict(torch.load(ckpt_path3, map_location='cpu'))
else:
    if args.arch == 'vgg16':
        load_pretrain_vgg16(model_rgb, pretrain=False)
"""""" """"" ~~~train or test~~~ """ """"""

#Trainer: class, defined in trainer.py
optimizer_rgb = init_optim(config.OPTIMIZERS, model_rgb.parameters(),
                           config.LR, config.WEIGHT_DECAY)
optimizer_six = init_optim(config.OPTIMIZERS, model_six.parameters(),
                           config.LR, config.WEIGHT_DECAY)
optimizer_att = init_optim(config.OPTIMIZERS, model_att.parameters(),
                           config.LR, config.WEIGHT_DECAY)
training = Trainer(
    cuda=cuda,
    model_rgb=model_rgb,
    model_six=model_six,
    model_att=model_att,
    optimizer_rgb=optimizer_rgb,
    optimizer_six=optimizer_six,
    optimizer_att=optimizer_att,
    train_sub=train_sub,
    val_sub=val_sub,
    train_loader=train_loader,
Exemplo n.º 11
0
def main():
    if args.dataset == 'ChestXray-NIHCC':
        if args.no_fiding:
            classes = [
                'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration',
                'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation',
                'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening',
                'Hernia', 'No Fiding'
            ]
        else:
            classes = [
                'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration',
                'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation',
                'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening',
                'Hernia'
            ]
    elif args.dataset == 'CheXpert-v1.0-small':
        classes = [
            'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
            'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
            'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
            'Pleural Other', 'Fracture', 'Support Devices'
        ]
    else:
        print('--dataset incorrect')
        return

    torch.manual_seed(args.seed)
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    pin_memory = True if use_gpu else False

    print("Initializing dataset: {}".format(args.dataset))

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize(556),
            transforms.CenterCrop(512),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(556),
            transforms.CenterCrop(512),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
    }

    datasetTrain = DatasetGenerator(path_base=args.base_dir,
                                    dataset_file='train',
                                    transform=data_transforms['train'],
                                    dataset_=args.dataset,
                                    no_fiding=args.no_fiding)

    datasetVal = DatasetGenerator(path_base=args.base_dir,
                                  dataset_file='valid',
                                  transform=data_transforms['valid'],
                                  dataset_=args.dataset,
                                  no_fiding=args.no_fiding)

    train_loader = DataLoader(dataset=datasetTrain,
                              batch_size=args.train_batch,
                              shuffle=args.train_shuffle,
                              num_workers=args.workers,
                              pin_memory=pin_memory)
    valid_loader = DataLoader(dataset=datasetVal,
                              batch_size=args.valid_batch,
                              shuffle=args.valid_shuffle,
                              num_workers=args.workers,
                              pin_memory=pin_memory)

    with open(args.infos_densenet) as f:
        cfg = edict(json.load(f))

    print('Initializing densenet branch')
    model_dense = Classifier(cfg)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_dense.parameters()) / 1000000.0))

    with open(args.infos_resnet) as f:
        cfg = edict(json.load(f))

    print('Initializing resnet branch')
    model_res = Classifier(cfg)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_res.parameters()) / 1000000.0))

    print('Initializing fusion branch')
    model_fusion = Fusion(input_size=7424, output_size=len(classes))
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_fusion.parameters()) / 1000000.0))

    print("Initializing optimizers")
    optimizer_dense = init_optim(args.optim, model_dense.parameters(),
                                 args.learning_rate, args.weight_decay,
                                 args.momentum)
    optimizer_res = init_optim(args.optim, model_res.parameters(),
                               args.learning_rate, args.weight_decay,
                               args.momentum)
    optimizer_fusion = init_optim(args.optim, model_fusion.parameters(),
                                  args.learning_rate, args.weight_decay,
                                  args.momentum)

    criterion = nn.BCELoss()

    print("Initializing scheduler: {}".format(args.scheduler))
    if args.stepsize > 0:
        scheduler_dense = init_scheduler(args.scheduler, optimizer_dense,
                                         args.stepsize, args.gamma)
        scheduler_res = init_scheduler(args.scheduler, optimizer_res,
                                       args.stepsize, args.gamma)
        scheduler_fusion = init_scheduler(args.scheduler, optimizer_fusion,
                                          args.stepsize, args.gamma)

    start_epoch = args.start_epoch
    best_loss = np.inf

    if args.resume_densenet:
        checkpoint_dense = torch.load(args.resume_densenet)
        model_dense.load_state_dict(checkpoint_dense['state_dict'])
        epoch_dense = checkpoint_dense['epoch']
        print("Resuming densenet from epoch {}".format(epoch_dense + 1))

    if args.resume_resnet:
        checkpoint_res = torch.load(args.resume_resnet)
        model_res.load_state_dict(checkpoint_res['state_dict'])
        epoch_res = checkpoint_res['epoch']
        print("Resuming resnet from epoch {}".format(epoch_res + 1))

    if args.resume_fusion:
        checkpoint_fusion = torch.load(args.resume_fusion)
        model_fusion.load_state_dict(checkpoint_fusion['state_dict'])
        epoch_fusion = checkpoint_fusion['epoch']
        print("Resuming fusion from epoch {}".format(epoch_fusion + 1))

    if use_gpu:
        model_dense = nn.DataParallel(model_dense).cuda()
        model_res = nn.DataParallel(model_res).cuda()
        model_fusion = nn.DataParallel(model_fusion).cuda()

    if args.evaluate:
        print("Evaluate only")
        if args.step == 1:
            valid('step1', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        elif args.step == 2:
            valid('step2', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        elif args.step == 3:
            valid('step3', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        else:
            print('args.step not found')
        return

    if args.step == 1:
        #################################### DENSENET BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of densenet branch")

        for p in model_dense.parameters():
            p.requires_grad = True

        for p in model_res.parameters():
            p.requires_grad = False

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step1', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step1', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_dense.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_dense.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_dense = model_dense.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_dense = model_dense.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_dense,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Dense branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### DENSENET BRANCH END ##########################################

    elif args.step == 2:
        #################################### RESNET BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of local branch")

        for p in model_dense.parameters():
            p.requires_grad = False

        for p in model_res.parameters():
            p.requires_grad = True

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step2', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step2', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_res.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_res.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_res = model_res.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_res = model_res.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_res,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Resnet branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### RESNET BRANCH END ##########################################

    elif args.step == 3:
        #################################### FUSION BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of fusion branch")

        for p in model_dense.parameters():
            p.requires_grad = True

        for p in model_res.parameters():
            p.requires_grad = True

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step3', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step3', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_dense.step(loss_val)
                        scheduler_res.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_dense.step()
                        scheduler_res.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_dense = model_dense.module.state_dict()
                    state_dict_res = model_res.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_dense = model_dense.state_dict()
                    state_dict_res = model_res.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_dense,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense')
                save_checkpoint(
                    {
                        'state_dict': state_dict_res,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Fusion branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### FUSION BRANCH END ##########################################

    else:
        print('args.step not found')
Exemplo n.º 12
0
def main():
    torch.manual_seed(1)
    os.environ['CUDA_VISIBLE_DEVICES'] = config.gpu_devices
    use_gpu = torch.cuda.is_available()

    sys.stdout = Logger(config.save_dir, config.checkpoint_suffix,
                        config.evaluate)
    print("\n==========\nArgs:")
    config.print_parameter()
    print("==========\n")

    if use_gpu:
        print("Currently using GPU {}".format(config.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(1)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(config.dataset))
    dataset = data_manager.init_imgreid_dataset(name=config.dataset,
                                                root=config.data_root)

    transform_train = T.Compose([
        T.Random2DTranslation(config.height, config.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=data_mean, std=data_std),
    ])

    transform_test = T.Compose([
        T.Resize((config.height, config.width)),
        T.ToTensor(),
        T.Normalize(mean=data_mean, std=data_std),
    ])

    pin_memory = True if use_gpu else False

    # train_batch_sampler = BalancedBatchSampler(dataset.train, n_classes=8, n_samples=8)
    # train_batch_sampler = CCLBatchSampler(dataset.train, n_classes=n_classes, n_samples=n_samples)
    # train_batch_sampler = CCLBatchSamplerV2(dataset.train, n_classes=n_classes, pos_samp_cnt=pos_samp_cnt,
    #                                         neg_samp_cnt=neg_samp_cnt, each_cls_max_cnt=each_cls_max_cnt)
    train_batch_sampler = ClassSampler(dataset.train,
                                       sample_cls_cnt=config.sample_cls_cnt,
                                       each_cls_cnt=config.each_cls_cnt)

    # trainloader = DataLoader(
    #     ImageDataset(dataset.train, transform=transform_train),
    #     batch_sampler=train_batch_sampler, batch_size=args.train_batch,
    #     shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True
    # )

    trainloader = DataLoader(ImageDatasetWCL(dataset,
                                             data_type='train',
                                             merge_h=256,
                                             merge_w=256,
                                             mean_std=[data_mean, data_std]),
                             batch_sampler=train_batch_sampler,
                             num_workers=config.workers,
                             pin_memory=pin_memory)

    queryloader = DataLoader(
        ImageDatasetWCL(dataset.query,
                        data_type='query',
                        merge_h=256,
                        merge_w=256,
                        mean_std=[data_mean, data_std]),
        batch_size=config.test_batch,
        shuffle=False,
        num_workers=config.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDatasetWCL(dataset.gallery,
                        data_type='gallery',
                        merge_h=256,
                        merge_w=256,
                        mean_std=[data_mean, data_std]),
        batch_size=config.test_batch,
        shuffle=False,
        num_workers=config.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    if config.dataset == 'vehicleid':
        train_query_loader = None
        train_gallery_loader = None
    else:
        train_query_loader = DataLoader(
            ImageDatasetWCL(dataset.train_query,
                            data_type='train_query',
                            merge_h=256,
                            merge_w=256,
                            mean_std=[data_mean, data_std]),
            batch_size=config.test_batch,
            shuffle=False,
            num_workers=config.workers,
            pin_memory=pin_memory,
            drop_last=False,
        )

        train_gallery_loader = DataLoader(
            ImageDatasetWCL(dataset.train_gallery,
                            data_type='train_gallery',
                            merge_h=256,
                            merge_w=256,
                            mean_std=[data_mean, data_std]),
            batch_size=config.test_batch,
            shuffle=False,
            num_workers=config.workers,
            pin_memory=pin_memory,
            drop_last=False,
        )

    print("Initializing model: {}".format(config.arch))
    model = init_model(name=config.arch,
                       num_classes=dataset.num_train_pids,
                       loss_type=config.loss_type)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    if config.loss_type == 'xent':
        criterion = [nn.CrossEntropyLoss(), nn.CrossEntropyLoss()]
    elif config.loss_type == 'xent_triplet':
        criterion = XentTripletLoss(
            margin=config.margin,
            triplet_selector=RandomNegativeTripletSelector(
                margin=config.margin),
            each_cls_cnt=config.each_cls_cnt,
            n_class=config.sample_cls_cnt)
    elif config.loss_type == 'xent_tripletv2':
        criterion = XentTripletLossV2(
            margin=config.margin,
            triplet_selector=RandomNegativeTripletSelectorV2(
                margin=config.margin),
            each_cls_cnt=config.each_cls_cnt,
            n_class=config.sample_cls_cnt)
        # criterion = XentTripletLossV2(margin=0.04, triplet_selector=RandomNegativeTripletSelectorV2(margin=0.04),
        #                               each_cls_cnt=config.each_cls_cnt, n_class=config.sample_cls_cnt)
        # criterion = XentGroupTripletLossV2(margin=0.8, triplet_selector=AllTripletSelector(margin=0.8),
        #                               each_cls_cnt=config.each_cls_cnt, n_class=config.sample_cls_cnt)
    else:
        raise KeyError("Unsupported loss: {}".format(config.loss_type))

    optimizer = init_optim(config.optim, model.parameters(), config.lr,
                           config.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=config.stepsize,
                                         gamma=config.gamma)

    if config.resume is not None:
        if check_isfile(config.resume):
            checkpoint = torch.load(config.resume)
            pretrain_dict = checkpoint['state_dict']
            model_dict = model.state_dict()
            pretrain_dict = {
                k: v
                for k, v in pretrain_dict.items()
                if k in model_dict and model_dict[k].size() == v.size()
            }
            model_dict.update(pretrain_dict)
            model.load_state_dict(model_dict)
            config.start_epoch = checkpoint['epoch']
            rank1 = checkpoint['rank1']
            if 'mAP' in checkpoint:
                mAP = checkpoint['mAP']
            else:
                mAP = 0
            print("Loaded checkpoint from '{}'".format(config.resume))
            print("- start_epoch: {}\n- rank1: {}\n- mAP: {}".format(
                config.start_epoch, rank1, mAP))

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if config.evaluate:
        print("Evaluate only")
        test_model(model, queryloader, galleryloader, train_query_loader,
                   train_gallery_loader, use_gpu, config.test_batch,
                   config.loss_type, config.euclidean_distance_loss)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_map = 0
    best_epoch = 0

    for epoch in range(config.start_epoch, config.max_epoch):
        print("==> Start training")
        start_train_time = time.time()
        scheduler.step()
        print('epoch:', epoch, 'lr:', scheduler.get_lr())
        train(epoch, model, criterion, optimizer, trainloader,
              config.loss_type, config.print_freq)
        train_time += round(time.time() - start_train_time)

        if epoch >= config.start_eval and config.eval_step > 0 and epoch % config.eval_step == 0 \
           or epoch == config.max_epoch:
            print("==> Test")
            rank1, mAP = test_model(model, queryloader, galleryloader,
                                    train_query_loader, train_gallery_loader,
                                    use_gpu, config.test_batch,
                                    config.loss_type,
                                    config.euclidean_distance_loss)
            is_best = rank1 > best_rank1

            if is_best:
                best_rank1 = rank1
                best_map = mAP
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'mAP': mAP,
                    'epoch': epoch + 1,
                },
                is_best,
                use_gpu_suo=False,
                fpath=osp.join(
                    config.save_dir, 'checkpoint_ep' + str(epoch + 1) +
                    config.checkpoint_suffix + '.pth.tar'))

    print("==> Best Rank-1 {:.2%}, mAP {:.2%}, achieved at epoch {}".format(
        best_rank1, best_map, best_epoch))
    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    transform_train = T.Compose([
        T.Resize((args.height, args.width)),
        T.RandomHorizontalFlip(p=0.5),
        T.Pad(10),
        T.RandomCrop([args.height, args.width]),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        torchvision.transforms.RandomErasing(p=0.5,
                                             scale=(0.02, 0.4),
                                             ratio=(0.3, 3.33),
                                             value=(0.4914, 0.4822, 0.4465))
        # T.RandomErasing(probability=0.5, sh=0.4, mean=(0.4914, 0.4822, 0.4465)),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        sampler=RandomIdentitySampler2(dataset.train,
                                       batch_size=args.train_batch,
                                       num_instances=args.num_instances),
        batch_size=args.train_batch,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent', 'htri'})
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))
    #embed()

    criterion_xent = CrossEntropyLabelSmooth(
        num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)

    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    # if args.stepsize > 0:
    #     scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    '''------Modify lr_schedule here------'''
    current_schedule = init_lr_schedule(schedule=args.schedule,
                                        warm_up_epoch=args.warm_up_epoch,
                                        half_cos_period=args.half_cos_period,
                                        lr_milestone=args.lr_milestone,
                                        gamma=args.gamma,
                                        stepsize=args.stepsize)

    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=current_schedule)
    '''------Please refer to the args.xxx for details of hyperparams------'''
    #embed()
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_xent, criterion_htri, optimizer,
              trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if args.schedule: scheduler.step()

        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (
                epoch + 1) % args.eval_step == 0 or (epoch +
                                                     1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 14
0
                          batch_size=valid_batch,
                          shuffle=valid_shuffle,
                          num_workers=workers,
                          pin_memory=pin_memory)

print("Initializing model: {}".format(arch))
model = models.init_model(name=arch,
                          num_classes=len(classes),
                          is_trained=trained)
print("Model size: {:.5f}M".format(
    sum(p.numel() for p in model.parameters()) / 1000000.0))

# print(model)

print("Initializing optimizer: {}".format(optim))
optimizer = init_optim(optim, model.parameters(), learning_rate, weight_decay,
                       momentum)

if use_gpu:
    model = nn.DataParallel(model).cuda()

model.train()
losses = AverageMeter()

for batch_idx, tuple_i in enumerate(valid_loader):
    data, target = tuple_i

    data = Variable(torch.FloatTensor(data).cuda(), requires_grad=True)
    target = Variable(torch.FloatTensor(target).cuda())

    output = model(data)
    print(output.shape)
Exemplo n.º 15
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root,
        name=args.dataset,
        split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled,
        cuhk03_classic_split=args.cuhk03_classic_split,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(
        ImageDataset(dataset.train, transform=transform_train),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=True,
    )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=pin_memory,
        drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch,
                              num_classes=dataset.num_train_pids,
                              loss={'xent'},
                              use_gpu=use_gpu)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    criterion = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids,
                                        use_gpu=use_gpu)
    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return

    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        if args.stepsize > 0: scheduler.step()

        if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                epoch + 1) == args.max_epoch:
            print("==> Test")
            rank1 = test(model, queryloader, galleryloader, use_gpu)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'rank1': rank1,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir,
                         'checkpoint_ep' + str(epoch + 1) + '.pth.tar'))

    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(
        best_rank1, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 16
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu:
        use_gpu = False
    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    train_dataset = data_manager.init_dataset(root=args.root,
                                              name='json',
                                              phase='train')
    valid_dataset = data_manager.init_dataset(root=args.root,
                                              name='json',
                                              phase='valid')

    test_dataset = data_manager.init_dataset(root=args.root,
                                             name='json',
                                             phase='test')

    test_mask = test_dataset.mask

    pin_memory = True if use_gpu else False

    trainloader = DataLoader(JsonDataset(train_dataset),
                             num_workers=4,
                             batch_size=args.train_batch,
                             pin_memory=pin_memory,
                             drop_last=True)

    validloader = DataLoader(JsonDataset(valid_dataset),
                             num_workers=4,
                             batch_size=args.test_batch,
                             pin_memory=pin_memory,
                             drop_last=True)

    testloader = DataLoader(JsonDataset(test_dataset),
                            num_workers=4,
                            batch_size=args.test_batch,
                            pin_memory=pin_memory,
                            drop_last=True)

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, use_gpu=use_gpu)
    print("Model size: {:.3f} M".format(count_num_param(model)))

    model.init_weights()
    criterion_label = LabelLoss()

    optimizer = init_optim(args.optim, model.parameters(), args.lr,
                           args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=args.stepsize,
                                         gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.load_weights:
        # load pretrained weights but ignore layers that don't match in size
        print("Loading pretrained weights from '{}'".format(args.load_weights))
        checkpoint = torch.load(args.load_weights)
        pretrain_dict = checkpoint['state_dict']
        model_dict = model.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items()
            if k in model_dict and model_dict[k].size() == v.size()
        }
        model_dict.update(pretrain_dict)
        model.load_state_dict(model_dict)

    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']
        print("Loaded checkpoint from '{}'".format(args.resume))
        print("- start_epoch: {}".format(start_epoch))

    # if use_gpu:
    # str_ids = args.gpu_devices.split(',')
    # gpu_ids = []
    # for str_id in str_ids:
    #     id = int(str_id)
    #     if id >= 0:
    #         gpu_ids.append(id)
    # model = nn.DataParallel(model, gpu_ids)
    device = torch.device(
        'cuda' if use_gpu and torch.cuda.is_available() else 'cpu')
    model.to(device)

    if args.evaluate:
        print("Evaluate only")
        start_evaluate_time = time.time()
        test_thetas = evaluate(model, testloader, use_gpu, args.test_batch,
                               test_mask)
        # test_thetas = evaluate(model, validloader, use_gpu, args.test_batch, test_mask)
        evaluate_time = time.time() - start_evaluate_time
        print('Evaluate: {} secs'.format(evaluate_time))
        with open("auto_sample.csv", "r") as csvfiler:
            with open("test_thetas.csv", "w") as csvfilew:
                reader = csv.reader(csvfiler)
                for item in reader:
                    if reader.line_num == 1:
                        writer = csv.writer(csvfilew)
                        writer.writerow(['test_id', 'result'])
                        continue
                    writer = csv.writer(csvfilew)
                    writer.writerow(
                        [item[0],
                         str(test_thetas[reader.line_num - 2])])
        # writer.writerows(map(lambda x: [x], test_thetas))
        return

    start_time = time.time()
    train_time = 0
    best_label_loss = np.inf
    best_epoch = 0
    #
    # print("==> Test")
    # label_loss = test(model, validloader, criterion_label, use_gpu, args.test_batch)
    # print("test label loss RMES() is {}".format(label_loss))
    #
    # print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        train(epoch, model, criterion_label, optimizer, trainloader, use_gpu)
        train_time += round(time.time() - start_train_time)

        scheduler.step()

        # save model every epoch
        if (epoch + 1) % args.save_step == 0:
            print("==> Now save epoch {} \'s model".format(epoch + 1))
            # if use_gpu:
            #     state_dict = model.state_dict() #  module.
            # else:
            state_dict = model.state_dict()

            save_checkpoint({
                'state_dict': state_dict,
                'epoch': epoch
            }, False, osp.join(args.save_dir, 'checkpoint_latest.pth'))

        # test model every eval_step
        if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or \
                (epoch + 1) == args.max_epoch:
            print("==> Test")
            label_loss = test(model, validloader, criterion_label, use_gpu,
                              args.test_batch)
            is_best = label_loss < best_label_loss

            if is_best:
                best_label_loss = label_loss
                best_epoch = epoch + 1

            # if use_gpu:
            #     state_dict = model.state_dict()
            # else:
            state_dict = model.state_dict()

            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'label_loss': label_loss,
                    'epoch': epoch,
                }, is_best,
                osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) +
                         '.pth'))  # .pth.tar

    print("==> Best Label Loss {:.3}, achieved at epoch {}".format(
        best_label_loss, best_epoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print(
        "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".
        format(elapsed, train_time))
Exemplo n.º 17
0
def main():
    torch.manual_seed(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))

    # tensorboardX
    # writer = SummaryWriter(log_dir=osp.join(args.save_dir,'summary'))

    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    print("Initializing dataset {}".format(args.dataset))
    dataset = data_manager.init_img_dataset(
        root=args.root, name=args.dataset, split_id=args.split_id,
        cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split,
    )

    transform_train = T.Compose([
        T.Random2DTranslation(args.height, args.width),
        T.RandomHorizontalFlip(),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    if args.random_erasing:
        transform_train = T.Compose([
            T.Random2DTranslation(args.height, args.width),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            RandomErasing(probability=args.probability, mean=[0.0, 0.0, 0.0]),
        ])
        

    transform_test = T.Compose([
        T.Resize((args.height, args.width)),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    pin_memory = True if use_gpu else False

    if args.loss == 'xent,htri':
        trainloader = DataLoader(
            ImageDataset(dataset.train, transform=transform_train),
            sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances),
            batch_size=args.train_batch, num_workers=args.workers,
            pin_memory=pin_memory, drop_last=True,
        )
    elif args.loss == 'xent':
        trainloader = DataLoader(
            ImageDataset(dataset.train, transform=transform_train),
            batch_size=args.train_batch, shuffle=True, num_workers=args.workers,
            pin_memory=pin_memory, drop_last=True,
        )

    queryloader = DataLoader(
        ImageDataset(dataset.query, transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    galleryloader = DataLoader(
        ImageDataset(dataset.gallery, transform=transform_test),
        batch_size=args.test_batch, shuffle=False, num_workers=args.workers,
        pin_memory=pin_memory, drop_last=False,
    )

    print("Initializing model: {}".format(args.arch))
    model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss=args.loss)
    print("Model size: {:.5f}M".format(sum(p.numel() for p in model.parameters())/1000000.0))

    criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids, use_gpu=use_gpu)
    criterion_htri = TripletLoss(margin=args.margin)
    
    optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay)
    if args.stepsize > 0:
        if not args.warmup:
            scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma)
    start_epoch = args.start_epoch

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        start_epoch = checkpoint['epoch']

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        test(model, queryloader, galleryloader, use_gpu)
        return
    def adjust_lr(optimizer, ep):
        if ep < 20:
            lr = 1e-4 * (ep + 1) / 2
        elif ep < 80:
            #lr = 1e-3 * len(args.gpu_devices)
            lr = 1e-3
        elif ep < 180:
            #lr = 1e-4 * len(args.gpu_devices)
            lr = 1e-4
        elif ep < 300:
            #lr = 1e-5 * len(args.gpu_devices)
            lr = 1e-5
        elif ep < 320:
            #lr = 1e-5 * 0.1 ** ((ep - 320) / 80) * len(args.gpu_devices)
            lr = 1e-5 * 0.1 ** ((ep - 320) / 80)
        elif ep < 400:
            lr = 1e-6
        elif ep < 480:
            #lr = 1e-4 * len(args.gpu_devices)
            lr = 1e-4
        else:
            #lr = 1e-5 * len(args.gpu_devices)
            lr = 1e-5
        for p in optimizer.param_groups:
            p['lr'] = lr
    
    length = len(trainloader)
    start_time = time.time()
    train_time = 0
    best_rank1 = -np.inf
    best_epoch = 0
    #best_rerank1 = -np.inf
    #best_rerankepoch = 0
    print("==> Start training")

    for epoch in range(start_epoch, args.max_epoch):
        start_train_time = time.time()
        if args.stepsize > 0:
            if args.warmup:
                adjust_lr(optimizer, epoch + 1)
            else:
                scheduler.step()
        train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu=use_gpu, summary=None, length=length)
        train_time += round(time.time() - start_train_time)
        
        if (epoch+1) > args.start_eval and args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch:
            print("==> Test")
            rank1 = test(epoch, model, queryloader, galleryloader, use_gpu=True, summary=None)
            is_best = rank1 > best_rank1
            if is_best:
                best_rank1 = rank1
                best_epoch = epoch + 1
            ####### Best Rerank
            #is_rerankbest = rerank1 > best_rerank1
            #if is_rerankbest:
            #    best_rerank1 = rerank1
            #    best_rerankepoch = epoch + 1

            if use_gpu:
                state_dict = model.module.state_dict()
            else:
                state_dict = model.state_dict()
            save_checkpoint({
                'state_dict': state_dict,
                'rank1': rank1,
                'epoch': epoch,
            }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch+1) + '.pth.tar'))

    writer.close()
    print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch))
    #print("==> Best Rerank-1 {:.1%}, achieved at epoch {}".format(best_rerank1, best_rerankepoch))

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    train_time = str(datetime.timedelta(seconds=train_time))
    print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))