def main(): print("==========\nArgs:{}\n==========".format(opt)) train_data_dir = opt.train_data_dir test_data_dir = opt.test_data_dir name = opt.name os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu_ids use_gpu = torch.cuda.is_available() if opt.use_cpu: use_gpu = False if not opt.evaluate: sys.stdout = Logger(osp.join(opt.save_dir, opt.train_log)) else: sys.stdout = Logger(osp.join(opt.save_dir, opt.test_log)) if use_gpu: print("Currently using GPU {}".format(opt.gpu_ids)) cudnn.benchmark = True else: print("Currently using CPU (GPU is highly recommended)") #str_ids = opt.gpu_ids.split(',') #gpu_ids = [] #for str_id in str_ids: # gid = int(str_id) # if gid >=0: # gpu_ids.append(gid) # ## set gpu ids #if len(gpu_ids)>0: # torch.cuda.set_device(gpu_ids[0]) #print(gpu_ids[0]) # Load train Data # --------- print("==========Preparing trian dataset========") transform_train_list = [ # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) transforms.Resize((288, 144), interpolation=3), transforms.RandomCrop((256, 128)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] if opt.PCB: transform_train_list = [ transforms.Resize((384, 192), interpolation=3), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] if opt.erasing_p > 0: transform_train_list = transform_train_list + [ RandomErasing(probability=opt.erasing_p, mean=[0.0, 0.0, 0.0]) ] if opt.color_jitter: transform_train_list = [ transforms.ColorJitter( brightness=0.1, contrast=0.1, saturation=0.1, hue=0) ] + transform_train_list train_data_transforms = transforms.Compose(transform_train_list) train_all = '' if opt.train_all: if opt.use_clean_imgs: train_all = '_all_clean' else: train_all = '_all' print("Using all the train images") train_image_datasets = {} train_image_datasets['train'] = datasets.ImageFolder( os.path.join(train_data_dir, 'train' + train_all), train_data_transforms) train_dataloaders = { x: torch.utils.data.DataLoader(train_image_datasets[x], batch_size=opt.train_batch, shuffle=True, num_workers=4) for x in ['train'] } dataset_sizes = {x: len(train_image_datasets[x]) for x in ['train']} class_names = train_image_datasets['train'].classes inputs, classes = next(iter(train_dataloaders['train'])) ###################################################################### # Prepare test data if not opt.train_only: print("========Preparing test dataset========") transform_test_list = transforms.Compose([ transforms.Resize((384, 192), interpolation=3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_image_datasets = { x: datasets.ImageFolder(os.path.join(test_data_dir, x), transform_test_list) for x in ['gallery', 'query'] } test_dataloaders = { x: torch.utils.data.DataLoader(test_image_datasets[x], batch_size=opt.test_batch, shuffle=False, num_workers=4) for x in ['gallery', 'query'] } print("Initializing model...") if opt.use_dense: model = ft_net_dense(len(class_names)) else: model = ft_net(len(class_names)) if opt.PCB: model = PCB(len(class_names)) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) start_epoch = opt.start_epoch if opt.resume: print("Loading checkpoint from '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) model.load_state_dict(checkpoint['state_dict']) # model.load_state_dict(checkpoint) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if opt.evaluate: print("Evaluate only") test(model, test_image_datasets, test_dataloaders, use_gpu) return criterion = nn.CrossEntropyLoss().cuda() if opt.PCB: ignored_params = list(map(id, model.module.resnet50.fc.parameters())) ignored_params += ( list(map(id, model.module.classifier0.parameters())) + list(map(id, model.module.classifier1.parameters())) + list(map(id, model.module.classifier2.parameters())) + list(map(id, model.module.classifier3.parameters())) + list(map(id, model.module.classifier4.parameters())) + list(map(id, model.module.classifier5.parameters())) #+list(map(id, model.classifier7.parameters() )) ) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer = optim.SGD( [ { 'params': base_params, 'lr': 0.01 }, { 'params': model.module.resnet50.fc.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier0.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier1.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier2.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier3.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier4.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier5.parameters(), 'lr': 0.1 }, #{'params': model.classifier7.parameters(), 'lr': 0.01} ], weight_decay=5e-4, momentum=0.9, nesterov=True) else: ignored_params = list(map( id, model.module.model.fc.parameters())) + list( map(id, model.module.classifier.parameters())) base_params = filter(lambda p: id(p) not in ignored_params, model.parameters()) optimizer = optim.SGD([{ 'params': base_params, 'lr': 0.01 }, { 'params': model.module.model.fc.parameters(), 'lr': 0.1 }, { 'params': model.module.classifier.parameters(), 'lr': 0.1 }], weight_decay=5e-4, momentum=0.9, nesterov=True) # Decay LR by a factor of 0.1 every 40 epochs if opt.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.stepsize, gamma=0.1) start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") ###################################################################### # Training the model # ------------------ # # Now, let's write a general function to train a model. Here, we will # illustrate: # # - Scheduling the learning rate # - Saving the best model # # In the following, parameter ``scheduler`` is an LR scheduler object from # ``torch.optim.lr_scheduler``. for epoch in range(start_epoch, opt.max_epoch): start_train_time = time.time() if opt.train_only: print("==> Training only") train(epoch, model, criterion, optimizer, train_dataloaders, use_gpu) train_time += round(time.time() - start_train_time) if epoch % opt.eval_step == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'epoch': epoch, }, 0, osp.join(opt.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) if opt.stepsize > 0: scheduler.step() else: train(epoch, model, criterion, optimizer, train_dataloaders, use_gpu) train_time += round(time.time() - start_train_time) if opt.stepsize > 0: scheduler.step() if (epoch + 1) > opt.start_eval and opt.eval_step > 0 and ( epoch + 1) % opt.eval_step == 0 or (epoch + 1) == opt.max_epoch: print("==> Test") rank1 = test(model, test_image_datasets, test_dataloaders, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(opt.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) if not opt.train_only: print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
'lr': opt.lr }, { 'params': model.classifier6.parameters(), 'lr': opt.lr }, # {'params': model.classifier7.parameters(), 'lr': opt.lr} ], weight_decay=5e-4, momentum=0.9, nesterov=True) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) pretrained_dict = torch.load( '/home/junyang/experiment/re-id-resnext101/checkpoints/82.65/net_99.pth') model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k[0:10] != 'classifier' } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) # Decay LR by a factor of 0.1 every 20 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1) # record import glob import shutil #import os #import datetime time1 = datetime.datetime.now()