best_Test_acc_epoch = 0 start_epoch = 0 # start from epoch 0 or last checkpoint epoch learning_rate_decay_start = 20 # 50 learning_rate_decay_every = 1 # 5 learning_rate_decay_rate = 0.8 # 0.9 cut_size = 44 total_epoch = 60 path = os.path.join(opt.dataset + '_' + opt.model, str(opt.fold)) # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(cut_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) trainset = CK(split='Training', fold=opt.fold, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.bs, shuffle=True, num_workers=1)
start_epoch = 0 # start from epoch 0 or last checkpoint epoch learning_rate_decay_start = 80 # 50 learning_rate_decay_every = 5 # 5 learning_rate_decay_rate = 0.9 # 0.9 cut_size = 44 total_epoch = 250 path = os.path.join(opt.dataset + '_' + opt.model) # Data print('==> Preparing data..') print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(44), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) trainset = FER2013(split='Training', transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.bs, shuffle=True, num_workers=0)
best_epoch = 0 best_acc5 = 0 global best_acc1 global best_epoch global best_acc5 start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): os.makedirs(args.checkpoint) # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), transforms.RandomErasing( probability=args.p, sh=args.sh, r1=args.r1, ), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])
def __init__(self, args, use_gpu): super(DataManager, self).__init__() self.args = args self.use_gpu = use_gpu print("Initializing dataset {}".format(args.dataset)) dataset = datasets.init_imgfewshot_dataset(name=args.dataset, root=args.root) if args.load: transform_train = T.Compose([ T.RandomCrop(84, padding=8), T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.RandomErasing(0.5) ]) transform_test = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) elif args.cifar: transform_train = T.Compose([ T.RandomCrop(32, padding=4), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]), T.RandomErasing(0.5) ]) transform_test = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]), ]) else: transform_train = T.Compose([ T.RandomResizedCrop((84, 84)), T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.RandomErasing(0.5) ]) transform_test = T.Compose([ T.Resize((92, 92)), T.CenterCrop((84, 84)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False self.trainloader = DataLoader( dataset_loader.init_loader( name='train_loader', dataset=dataset.train, labels2inds=dataset.train_labels2inds, labelIds=dataset.train_labelIds, nKnovel=args.nKnovel, nExemplars=args.nExemplars, nTestNovel=args.train_nTestNovel, epoch_size=args.train_epoch_size, transform=transform_train, load=args.load, tiered=args.tiered, ), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) self.valloader = DataLoader( dataset_loader.init_loader( name='test_loader', dataset=dataset.val, labels2inds=dataset.val_labels2inds, labelIds=dataset.val_labelIds, nKnovel=args.nKnovel, nExemplars=args.nExemplars, nTestNovel=args.nTestNovel, epoch_size=args.epoch_size, transform=transform_test, load=args.load, tiered=args.tiered, ), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) self.testloader = DataLoader( dataset_loader.init_loader( name='test_loader', dataset=dataset.test, labels2inds=dataset.test_labels2inds, labelIds=dataset.test_labelIds, nKnovel=args.nKnovel, nExemplars=args.nExemplars, nTestNovel=args.nTestNovel, epoch_size=args.epoch_size, transform=transform_test, load=args.load, tiered=args.tiered, ), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, )
def main(args): if args.apex: if sys.version_info < (3, 0): raise RuntimeError("Apex currently only supports Python 3. Aborting.") if amp is None: raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " "to enable mixed-precision training.") if args.output_dir: utils.mkdir(args.output_dir) utils.init_distributed_mode(args) print(args) print("torch version: ", torch.__version__) print("torchvision version: ", torchvision.__version__) device = torch.device(args.device) torch.backends.cudnn.benchmark = True # Data loading code print("Loading data") traindir = os.path.join(args.data_path, 'train_avi-480p') valdir = os.path.join(args.data_path, 'val_avi-480p') normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]) print("Loading training data") st = time.time() cache_path = _get_cache_path(traindir) transform_train = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Resize((128, 171)), T.RandomHorizontalFlip(), normalize, T.RandomCrop((112, 112)) ]) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_train from {}".format(cache_path)) dataset, _ = torch.load(cache_path) dataset.transform = transform_train else: if args.distributed: print("It is recommended to pre-compute the dataset cache " "on a single-gpu first, as it will be faster") dataset = torchvision.datasets.Kinetics400( traindir, frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_train ) if args.cache_dataset: print("Saving dataset_train to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset, traindir), cache_path) dataset.video_clips.compute_clips(args.clip_len, 1, frame_rate=15) print("Took", time.time() - st) print("Loading validation data") cache_path = _get_cache_path(valdir) transform_test = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Resize((128, 171)), normalize, T.CenterCrop((112, 112)) ]) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_test from {}".format(cache_path)) dataset_test, _ = torch.load(cache_path) dataset_test.transform = transform_test else: if args.distributed: print("It is recommended to pre-compute the dataset cache " "on a single-gpu first, as it will be faster") dataset_test = torchvision.datasets.Kinetics400( valdir, frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_test ) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset_test, valdir), cache_path) dataset_test.video_clips.compute_clips(args.clip_len, 1, frame_rate=15) print("Creating data loaders") train_sampler = RandomClipSampler(dataset.video_clips, args.clips_per_video) test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video) if args.distributed: train_sampler = DistributedSampler(train_sampler) test_sampler = DistributedSampler(test_sampler) data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) print("Creating model") model = torchvision.models.video.__dict__[args.model](pretrained=args.pretrained) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) criterion = nn.CrossEntropyLoss() lr = args.lr * args.world_size optimizer = torch.optim.SGD( model.parameters(), lr=lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.apex: model, optimizer = amp.initialize(model, optimizer, opt_level=args.apex_opt_level ) # convert scheduler to be per iteration, not per epoch, for warmup that lasts # between different epochs warmup_iters = args.lr_warmup_epochs * len(data_loader) lr_milestones = [len(data_loader) * m for m in args.lr_milestones] lr_scheduler = WarmupMultiStepLR( optimizer, milestones=lr_milestones, gamma=args.lr_gamma, warmup_iters=warmup_iters, warmup_factor=1e-5) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: evaluate(model, criterion, data_loader_test, device=device) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, args.print_freq, args.apex) evaluate(model, criterion, data_loader_test, device=device) if args.output_dir: checkpoint = { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args} utils.save_on_master( checkpoint, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) utils.save_on_master( checkpoint, os.path.join(args.output_dir, 'checkpoint.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
parser.add_argument('--r1', default=0.3, type=float, help='aspect of erasing area') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} transform_train = transforms.Compose([ transforms.RandomCrop(28, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), transforms.RandomErasing( probability=args.p, sh=args.sh, r1=args.r1, ), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), ])
dataset = h5py.File(file_path) # set up map for different categories categories = np.genfromtxt(args.categories, dtype='str')[:, 0] # set up model and convert into cuda model = NAME_TO_MODEL[args.name].cuda() print('==> model loaded') best_top_5 = 0 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform = [ transforms.Scale(256), transforms.RandomCrop(224), # transforms.RandomResizedCrop(224) ] # transform.extend([transforms.RandomResizedCrop(224)]) transform.extend([ # for inception 1 # transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.1, hue=0.0), transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.35, hue=0.1), # transforms.ColorJitter(brightness=0.05, contrast=0.05, saturation=0.3, hue=0.1), transforms.RandomHorizontalFlip(), ]) transform += [transforms.ToTensor()]
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) ############################# Hyper-parameters ################################ alpha = 1.0 beta = 1.0 at_margin = 1 pixel_mean = [0.485, 0.456, 0.406] pixel_std = [0.229, 0.224, 0.225] inp_size = [384, 128] # transforms transforms_list = transforms.Compose([ transforms.RectScale(*inp_size), transforms.RandomHorizontalFlip(), transforms.Pad(10), transforms.RandomCrop(inp_size), transforms.ToTensor(), transforms.Normalize(mean=pixel_mean, std=pixel_std), transforms.RandomErasing(probability=0.5, mean=pixel_mean) ]) test_transforms_list = transforms.Compose([ transforms.RectScale(*inp_size), transforms.ToTensor(), transforms.Normalize(mean=pixel_mean, std=pixel_std) ])
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_img_dataset( root=args.root, name=args.dataset, split_id=args.split_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_split=args.cuhk03_classic_split, ) transform_train = T.Compose([ T.Resize((args.height, args.width)), T.RandomHorizontalFlip(p=0.5), T.Pad(10), T.RandomCrop([args.height, args.width]), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), torchvision.transforms.RandomErasing(p=0.5, scale=(0.02, 0.4), ratio=(0.3, 3.33), value=(0.4914, 0.4822, 0.4465)) #T.RandomErasing(probability=0.5, sh=0.4, mean=(0.4914, 0.4822, 0.4465)), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), batch_size=args.train_batch, shuffle=True, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, loss={'ring'}, use_gpu=use_gpu) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) criterion_ring = RingLoss(args.weight_ring) if use_gpu: criterion_ring = criterion_ring.cuda() params = list(model.parameters()) + list(criterion_ring.parameters()) optimizer = init_optim(args.optim, params, args.lr, args.weight_decay) # if args.stepsize > 0: # scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) '''------Modify lr_schedule here------''' current_schedule = init_lr_schedule(schedule=args.schedule, warm_up_epoch=args.warm_up_epoch, half_cos_period=args.half_cos_period, lr_milestone=args.lr_milestone, gamma=args.gamma, stepsize=args.stepsize) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=current_schedule) '''------Please refer to the args.xxx for details of hyperparams------''' # embed() start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu) return start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_ring, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if args.schedule: scheduler.step() if (epoch + 1) > args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: print("==> Test") rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
def main(): runId = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') cfg.OUTPUT_DIR = os.path.join(cfg.OUTPUT_DIR, runId) if not os.path.exists(cfg.OUTPUT_DIR): os.mkdir(cfg.OUTPUT_DIR) print(cfg.OUTPUT_DIR) torch.manual_seed(cfg.RANDOM_SEED) random.seed(cfg.RANDOM_SEED) np.random.seed(cfg.RANDOM_SEED) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID use_gpu = torch.cuda.is_available() and cfg.MODEL.DEVICE == "cuda" if not cfg.EVALUATE_ONLY: sys.stdout = Logger(osp.join(cfg.OUTPUT_DIR, 'log_train.txt')) else: sys.stdout = Logger(osp.join(cfg.OUTPUT_DIR, 'log_test.txt')) print("==========\nConfigs:{}\n==========".format(cfg)) if use_gpu: print("Currently using GPU {}".format(cfg.MODEL.DEVICE_ID)) cudnn.benchmark = True torch.cuda.manual_seed_all(cfg.RANDOM_SEED) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(cfg.DATASETS.NAME)) dataset = data_manager.init_dataset(root=cfg.DATASETS.ROOT_DIR, name=cfg.DATASETS.NAME) if cfg.ATTR_RECOG_ON: cfg.DATASETS.ATTR_LENS = dataset.attr_lens else: cfg.DATASETS.ATTR_LENS = [] cfg.DATASETS.ATTR_COLUMNS = dataset.columns print("Initializing model: {}".format(cfg.MODEL.NAME)) if cfg.MODEL.ARCH == 'video_baseline': torch.backends.cudnn.benchmark = False model = models.init_model(name=cfg.MODEL.ARCH, num_classes=dataset.num_train_pids, pretrain_choice=cfg.MODEL.PRETRAIN_CHOICE, last_stride=cfg.MODEL.LAST_STRIDE, neck=cfg.MODEL.NECK, model_name=cfg.MODEL.NAME, neck_feat=cfg.TEST.NECK_FEAT, model_path=cfg.MODEL.PRETRAIN_PATH, fusion_method=cfg.MODEL.FUSION_METHOD, attr_lens=cfg.DATASETS.ATTR_LENS, attr_loss=cfg.DATASETS.ATTRIBUTE_LOSS) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) transform_train = T.Compose([ T.Resize(cfg.INPUT.SIZE_TRAIN), T.RandomHorizontalFlip(p=cfg.INPUT.PROB), T.Pad(cfg.INPUT.PADDING), T.RandomCrop(cfg.INPUT.SIZE_TRAIN), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.RandomErasing(probability=cfg.INPUT.RE_PROB, mean=cfg.INPUT.PIXEL_MEAN) ]) transform_test = T.Compose([ T.Resize(cfg.INPUT.SIZE_TEST), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # pin_memory = True if use_gpu else False pin_memory = False cfg.DATALOADER.NUM_WORKERS = 0 cfg.DATASETS.ATTR_COLUMNS = dataset.columns trainloader = DataLoader(VideoDataset( dataset.train, seq_len=cfg.DATASETS.SEQ_LEN, sample=cfg.DATASETS.TRAIN_SAMPLE_METHOD, transform=transform_train, attr_loss=cfg.DATASETS.ATTRIBUTE_LOSS, attr_lens=cfg.DATASETS.ATTR_LENS, dataset_name=cfg.DATASETS.NAME), sampler=RandomIdentitySampler( dataset.train, num_instances=cfg.DATALOADER.NUM_INSTANCE), batch_size=cfg.SOLVER.SEQS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS, pin_memory=pin_memory, drop_last=True) queryloader = DataLoader(VideoDataset( dataset.query, seq_len=cfg.DATASETS.SEQ_LEN, sample=cfg.DATASETS.TEST_SAMPLE_METHOD, transform=transform_test, max_seq_len=cfg.DATASETS.TEST_MAX_SEQ_NUM, dataset_name=cfg.DATASETS.NAME, attr_lens=cfg.DATASETS.ATTR_LENS, attr_loss=cfg.DATASETS.ATTRIBUTE_LOSS), batch_size=cfg.TEST.SEQS_PER_BATCH, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, pin_memory=pin_memory, drop_last=False) galleryloader = DataLoader( VideoDataset(dataset.gallery, seq_len=cfg.DATASETS.SEQ_LEN, sample=cfg.DATASETS.TEST_SAMPLE_METHOD, transform=transform_test, max_seq_len=cfg.DATASETS.TEST_MAX_SEQ_NUM, dataset_name=cfg.DATASETS.NAME, attr_lens=cfg.DATASETS.ATTR_LENS, attr_loss=cfg.DATASETS.ATTRIBUTE_LOSS), batch_size=cfg.TEST.SEQS_PER_BATCH, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, pin_memory=pin_memory, drop_last=False, ) if cfg.MODEL.SYN_BN: model = apex.parallel.convert_syncbn_model(model) optimizer = make_optimizer(cfg, model) if cfg.SOLVER.FP_16: model, optimizer = apex.amp.initialize(model.cuda(), optimizer, opt_level='O1') if use_gpu: model = nn.DataParallel(model) model.cuda() start_time = time.time() xent = CrossEntropyLabelSmooth(num_classes=dataset.num_train_pids) if cfg.DISTANCE == "cosine": tent = CosineTripletLoss(cfg.SOLVER.MARGIN) elif cfg.DISTANCE == "euclid": tent = TripletLoss(cfg.SOLVER.MARGIN) if cfg.DATASETS.ATTRIBUTE_LOSS == "mce": attr_criter = nn.CrossEntropyLoss() elif cfg.DATASETS.ATTRIBUTE_LOSS == "bce": attr_criter = nn.BCEWithLogitsLoss() tlaw = TripletLossAttrWeightes(dis_type=cfg.DISTANCE) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) ema = None no_rise = 0 metrics = test(model, queryloader, galleryloader, cfg.TEST.TEMPORAL_POOL_METHOD, use_gpu) # return best_rank1 = 0 start_epoch = 0 for epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCHS): # if no_rise == 10: # break scheduler.step() print("noriase:", no_rise) print("==> Epoch {}/{}".format(epoch + 1, cfg.SOLVER.MAX_EPOCHS)) print("current lr:", scheduler.get_lr()[0]) train(model, trainloader, xent, tent, attr_criter, optimizer, use_gpu, tlaw=tlaw) if cfg.SOLVER.EVAL_PERIOD > 0 and ( (epoch + 1) % cfg.SOLVER.EVAL_PERIOD == 0 or (epoch + 1) == cfg.SOLVER.MAX_EPOCHS): print("==> Test") metrics = test(model, queryloader, galleryloader, cfg.TEST.TEMPORAL_POOL_METHOD, use_gpu) rank1 = metrics[0] if rank1 > best_rank1: best_rank1 = rank1 no_rise = 0 else: no_rise += 1 continue if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() torch.save( state_dict, osp.join( cfg.OUTPUT_DIR, "rank1_" + str(rank1) + '_checkpoint_ep' + str(epoch + 1) + '.pth')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
elif mode == 'val': main_df = pd.read_csv(str(data_path / 'val_df.csv')) df = main_df df['class_id'] = df['target'].map(class_map) df['is_manip'] = 0 df = df[df['target'].notnull()] df['is_manip'] = 1 return df return None train_transform = Compose([ albu_trans.RandomCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_transform = Compose([ albu_trans.CenterCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def add_args(parser): arg = parser.add_argument arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=4)