def main(): opts = TestOptions().parse() if not os.path.isdir(opts.output_dir): os.makedirs(opts.output_dir) result_dir = ospj(opts.output_dir, "results_wild_ganimation") os.makedirs(result_dir, exist_ok=True) model = ModelsFactory.get_by_name(opts.model, opts) model.set_eval() test_loader = get_dataloader(opts.data_dir, 'wild_images', 'test', opts.image_size, opts.selected_attrs, 1) test_epoch = 0 all_sre_val = 0.0 for test_batch_idx, test_data_batch in enumerate(test_loader): org_img, org_attr = test_data_batch['real_img'].clone().detach(), test_data_batch['real_cond'].clone().detach() img, attr, _ = change_hair_color_target(org_img.clone().detach(), org_attr.clone().detach(), opts.selected_attrs) test_data_batch['real_img'] = img test_data_batch['desired_cond'] = attr test_data_batch['real_cond'] = attr save_file = ospj(result_dir, f"test_epoch_{test_epoch}_wild_batch_{test_batch_idx}_test_hair_color.png") test_and_save(model, test_data_batch, save_file) # skin color img, attr, _ = change_skin_color_target(org_img.clone().detach(), org_attr.clone().detach(), opts.selected_attrs) test_data_batch['real_img'] = img test_data_batch['desired_cond'] = attr test_data_batch['real_cond'] = attr save_file = ospj(result_dir, f"test_epoch_{test_epoch}_wild_batch_{test_batch_idx}_test_skin_color.png") test_and_save(model, test_data_batch, save_file) # beard img, attr, _ = change_beard_target(org_img.clone().detach(), org_attr.clone().detach(), opts.selected_attrs) test_data_batch['real_img'] = img test_data_batch['desired_cond'] = attr test_data_batch['real_cond'] = attr save_file = ospj(result_dir, f"test_epoch_{test_epoch}_wild_batch_{test_batch_idx}_test_beard.png") test_and_save(model, test_data_batch, save_file) # one attribute img, attr, _ = change_one_attr_target(org_img.clone().detach(), org_attr.clone().detach(), opts.selected_attrs) test_data_batch['real_img'] = img test_data_batch['desired_cond'] = attr test_data_batch['real_cond'] = attr save_file = ospj(result_dir, f"test_epoch_{test_epoch}_wild_batch_{test_batch_idx}_test_one.png") test_and_save(model, test_data_batch, save_file) test_data_batch['real_img'] = org_img test_data_batch['desired_cond'] = org_attr test_data_batch['real_cond'] = org_attr sre = self_rec(model, test_data_batch) # print(test_batch_idx, sre) all_sre_val += sre.item() print("sre:", all_sre_val / len(test_loader))
def main(): opts = TestOptions().parse() if not os.path.isdir(opts.output_dir): os.makedirs(opts.output_dir) eval_part1_loader = get_dataloader(opts.data_dir, 'celebahq_ffhq_fake', 'eval_part1', opts.image_size, opts.selected_attrs, len(opts.selected_attrs)) eval_part2_loader = get_dataloader(opts.data_dir, 'celebahq_ffhq_fake', 'eval_part2', opts.image_size, opts.selected_attrs, len(opts.selected_attrs)) model = ModelsFactory.get_by_name(opts.model, opts) model.set_eval() block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[opts.dims] fid_model = InceptionV3([block_idx]) fid_model = torch.nn.DataParallel(fid_model).cuda() fid_model = fid_model.eval() # NOTE: here we hard code to resnet18, we construct the resnet with selected attributes attr_pred_model = resnet18(pretrained=True, num_attributes=len(opts.selected_attrs)) attr_model_ckpt = ospj('eval_attr/checkpoints_select_no_extra/model_best.pth.tar') # for local assert os.path.isfile(attr_model_ckpt), f"checkpoint file {attr_model_ckpt} for attribute prediction not found!" print(f"=> loading attribute checkpoint '{attr_model_ckpt}'") checkpoint = torch.load(attr_model_ckpt, map_location=torch.device("cpu")) attr_pred_model.load_state_dict(checkpoint['state_dict']) attr_pred_model = torch.nn.DataParallel(attr_pred_model).cuda() attr_pred_model = attr_pred_model.eval() # attr_pred_model_cpu = attr_pred_model.module print(f"=> loaded attribute checkpoint '{attr_model_ckpt}' (epoch {checkpoint['epoch']})") fid_score = predict_fid_score(opts, eval_part1_loader, eval_part2_loader, fid_model, model) all_attrs_avg, each_attr_avg = predict_attr_score(opts, eval_part2_loader, attr_pred_model, model) eval_dict = {} eval_dict["FID"] = fid_score eval_dict["Attribute_Average"] = all_attrs_avg for k, v in eval_dict.items(): # writer.add_scalar(f"Eval/{k}", v, epoch) print(f"Eval {k}: {v}") all_attr_eval_dict = {} for attr_name, attr_pred in zip(opts.selected_attrs, each_attr_avg): all_attr_eval_dict[attr_name] = attr_pred print(f"Eval {attr_name}: {attr_pred}")
def __init__(self): self._opt = TrainOptions().parse() # data_loader_train = CustomDatasetDataLoader(self._opt, is_for_train=True) # data_loader_test = CustomDatasetDataLoader(self._opt, is_for_train=False) data_loader_train = get_dataloader(self._opt.data_dir, img_size=self._opt.image_size, selected_attrs=self._opt.selected_attrs, mode='train', batch_size=self._opt.batch_size) data_loader_test = get_dataloader(self._opt.data_dir, img_size=self._opt.image_size, selected_attrs=self._opt.selected_attrs, mode='val', batch_size=self._opt.batch_size) self._dataset_train = data_loader_train self._dataset_test = data_loader_test self._dataset_train_size = len(data_loader_train) self._dataset_test_size = len(data_loader_test) print('#train image batches = %d' % self._dataset_train_size) # dataloader size, not dataset size print('#test image batches = %d' % self._dataset_test_size) # dataloader size, not dataset size self._model = ModelsFactory.get_by_name(self._opt.model, self._opt) self._tb_visualizer = TBVisualizer(self._opt) self._train()
def main(): args = get_args() if args.dataset == 'mnist': n_channels = 1 else: n_channels = 3 device = 'cuda' if torch.cuda.is_available() else 'cpu' data_loader = dataloader.get_dataloader( args.dataset, args.batch_size, args.data_folder ) generator, discriminator = dcgan.get_models( args.nz, n_channels, args.feature_map, device) loss_function = nn.BCELoss() optimizer_generator = optim.Adam( generator.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay ) optimizer_discriminator = optim.Adam( discriminator.parameters(), lr=args.lr, betas=(args.beta1, args.beta1), weight_decay=args.weight_decay ) generator, discriminator = train_model( data_loader, discriminator, generator, args.epochs, loss_function, optimizer_discriminator, optimizer_generator, args.nz, device ) print('Finished training') save.save_samples( args.data_folder, args.num_samples_to_save, generator, args.nz, device) print('Finished saving!')
def train_and_evaluate(opt, logger=None, tb_logger=None): """ Train and evaluate a model The basic understanding of `train_and_evaluate()` can be broken down into two parts. Part 1 focuses on getting the dataloaders, model, and trainer to conduct the training/evaluation. Part 2.A and 2.B is about training or evaluating, respectively. Given the mode, train_and_evaluate can take two actions: 1) mode == TRAIN ---> action: train_and_validate 2) mode == VAL ---> action: evaluate the model on the full validation/test set Args: opt (Config): A state dictionary holding preset parameters logger (Logger): Logging instance tb_logger (SummaryWriter): Tensorboard logging instance Returns: None """ #TODO implement Early Stopping #TODO implement test code logger = logger if logger else logging.getLogger('train-and-evaluate') logger.setLevel(opt.logging_level) # Read in dataset # check the path for the data loader to make sure it is loading the right data set data_loader = { mode: get_dataloader(data_dir=opt.data_dir, batch_size=opt.batch_size, mode=mode) for mode in [CONST.TRAIN, CONST.VAL] } # Create model model = HABClassifier(arch=opt.arch, pretrained=opt.pretrained, num_classes=opt.class_num) # Initialize Trainer for initializing losses, optimizers, loading weights, etc trainer = Trainer(model=model, model_dir=opt.model_dir, mode=opt.mode, resume=opt.resume, lr=opt.lr, class_count=data_loader[CONST.TRAIN].dataset.data[ CONST.LBL].value_counts()) #==== BEGIN OPTION 1: TRAINING ====# # Train and validate model if set to TRAINING # When training, we do both training and validation within the loop. # When set to the validation mode, this will run a full evaluation # and produce more summarized evaluation results. This is the default condition # if the mode is not training. if opt.mode == CONST.TRAIN: best_err = trainer.best_err Logger.section_break('Valid (Epoch {})'.format(trainer.start_epoch)) err, acc, _, metrics_test = evaluate(trainer.model, trainer, data_loader[CONST.VAL], 0, opt.batch_size, logger, tb_logger, max_iters=None) metrics_best = metrics_test eps_meter = get_meter( meters=['train_loss', 'val_loss', 'train_acc', 'val_acc']) for ii, epoch in enumerate( range(trainer.start_epoch, trainer.start_epoch + opt.epochs)): # Train for one epoch Logger.section_break('Train (Epoch {})'.format(epoch)) train_loss, train_acc = train(trainer.model, trainer, data_loader[CONST.TRAIN], epoch, logger, tb_logger, opt.batch_size, opt.print_freq) eps_meter['train_loss'].update(train_loss) eps_meter['train_acc'].update(train_acc) # Evaluate on validation set Logger.section_break('Valid (Epoch {})'.format(epoch)) err, acc, _, metrics_test = evaluate(trainer.model, trainer, data_loader[CONST.VAL], epoch, opt.batch_size, logger, tb_logger, max_iters=None) eps_meter['val_loss'].update(err) eps_meter['val_acc'].update(acc) # Remember best error and save checkpoint is_best = err < best_err best_err = min(err, best_err) state = trainer.generate_state_dict(epoch=epoch, best_err=best_err) if epoch % opt.save_freq == 0: trainer.save_checkpoint( state, is_best=False, filename='checkpoint-{}_{:0.4f}.pth.tar'.format( epoch, acc)) if is_best: metrics_best = metrics_test trainer.save_checkpoint(state, is_best=is_best, filename='model_best.pth.tar') # ==== END OPTION 1: TRAINING LOOP ====# # Generate evaluation plots opt.train_acc = max(eps_meter['train_acc'].data) opt.test_acc = max(eps_meter['val_acc'].data) #plot loss over eps vis_training(eps_meter['train_loss'].data, eps_meter['val_loss'].data, loss=True) #plot acc over eps vis_training(eps_meter['train_acc'].data, eps_meter['val_acc'].data, loss=False) #plot best confusion matrix plt.figure() metrics_best.compute_cm(plot=True) #==== BEGIN OPTION 2: EVALUATION ====# # EVALUATE the model if set to evaluation mode # Below you'll receive a more comprehensive report of the evaluation in the eval.log elif opt.mode == CONST.VAL: err, acc, run_time, metrics = evaluate( model=trainer.model, trainer=trainer, data_loader=data_loader[CONST.VAL], logger=logger, tb_logger=tb_logger) Logger.section_break('EVAL COMPLETED') model_parameters = filter(lambda p: p.requires_grad, trainer.model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) metrics.print_eval(params, run_time, err, acc, metrics.results_dir) cm, mca = metrics.compute_cm(plot=True)
if num_gpu > 1: model = torch.nn.DataParallel(model) print("make DataParallel") model = model.cuda() print("Done") ###################################stage Enc setting ############################################## if (not args.decoder_only): logger, this_savedir = info_setting(test_config['save_dir'], test_config["Model"], total_paramters, N_flop) logger.flush() logdir = this_savedir.split(test_config['save_dir'])[1] my_logger = Logger(8097, './logs/' + logdir, args.use_nsml) trainLoader, valLoader, data = get_dataloader(data_config) print(data['mean']) print(data['std']) weight = torch.from_numpy( data['classWeights']) # convert the numpy array to torch print(weight) if test_config["loss"] == "Lovasz": from etc.lovasz_losses import lovasz_hinge criteria = lovasz_hinge(ignore=data_config["ignore_idx"]) else: from etc.Criteria import CrossEntropyLoss2d criteria = CrossEntropyLoss2d( weight, ignore=data_config["ignore_idx"]) # weight
def train_and_evaluate(opt, logger=None): """ Train and evaluate a model The basic understanding of `train_and_evaluate()` can be broken down into two parts. Part 1 focuses on getting the dataloaders, model, and trainer to conduct the training/evaluation. Part 2.A and 2.B is about training or evaluating, respectively. Given the mode, train_and_evaluate can take two actions: 1) mode == TRAIN ---> action: train_and_validate 2) mode == VAL ---> action: evaluate the model on the full validation/test set Args: opt (Config): A state dictionary holding preset parameters logger (Logger): Logging instance Returns: None """ #TODO implement Early Stopping #TODO implement test code logger = logger if logger else logging.getLogger('train-and-evaluate') logger.setLevel(opt.logging_level) # Read in dataset # check the path for the data loader to make sure it is loading the right data set data_loader = {mode: get_dataloader(data_dir=opt.data_dir, batch_size=opt.batch_size, mode=mode) for mode in [CONST.TRAIN, CONST.VAL]} # Create model model = MODEL(arch=opt.arch, pretrained=opt.pretrained, num_classes=2) # Initialize Trainer for initializing losses, optimizers, loading weights, etc trainer = Trainer(model=model, model_dir=opt.model_dir, mode=opt.mode, resume=opt.resume, lr=opt.lr) #==== TRAINING ====# # Train and validate model if set to TRAINING # When training, we do both training and validation within the loop. # When set to the validation mode, this will run a full evaluation # and produce more summarized evaluation results. This is the default condition # if the mode is not training. if opt.mode == CONST.TRAIN: best_err = trainer.best_err Logger.section_break('Valid (Epoch {})'.format(trainer.start_epoch)) err, acc, _ = evaluate(trainer.model, trainer, data_loader[CONST.VAL], 0, opt.batch_size, logger) eps_meter = get_meter(meters=['train_loss', 'val_loss', 'train_acc', 'val_acc']) best_err = min(best_err, err) for ii, epoch in enumerate(range(trainer.start_epoch, trainer.start_epoch+opt.epochs)): # Train for one epoch Logger.section_break('Train (Epoch {})'.format(epoch)) train_loss, train_acc = train(trainer.model, trainer, data_loader[CONST.TRAIN], epoch, logger, opt.batch_size, opt.print_freq) eps_meter['train_loss'].update(train_loss) eps_meter['train_acc'].update(train_acc) # Evaluate on validation set Logger.section_break('Valid (Epoch {})'.format(epoch)) err, acc, _ = evaluate(trainer.model, trainer, data_loader[CONST.VAL], epoch, opt.batch_size, logger) eps_meter['val_loss'].update(err) eps_meter['val_acc'].update(acc) # Remember best error and save checkpoint is_best = err < best_err best_err = min(err, best_err) state = trainer.generate_state_dict(epoch=epoch, best_err=best_err) if is_best: trainer.save_checkpoint(state, is_best=is_best, filename='model_best.pth.tar') # ==== END: TRAINING LOOP ====# if len(eps_meter['train_loss'].data) > 0: #plot loss over eps vis_training(eps_meter['train_loss'].data, eps_meter['val_loss'].data, loss=True) #plot acc over eps vis_training(eps_meter['train_acc'].data, eps_meter['val_acc'].data, loss=False)
def deploy(opt, logger=None): """ Deploy a model in production mode, assumes unseen/unlabeled data as input Function call is intended to run model on unseen/unlabeled data, hence deployment. Args: opt (Config): A state dictionary holding preset parameters logger (Logger): Logging instance Returns: """ logger = logger if logger else logging.getLogger('deploy') logger.setLevel(opt.logging_level) start_datetime = datetime.today().strftime('%Y-%m-%d_%H:%M:%S') # read data data_loader = get_dataloader(mode=CONST.DEPLOY, csv_file=opt.deploy_data, batch_size=opt.batch_size, input_size=opt.input_size) # load model model = HABClassifier(arch=opt.arch, pretrained=opt.pretrained, num_classes=opt.class_num) # Initialize Trainer for initializing losses, optimizers, loading weights, etc trainer = Trainer(model=model, model_dir=opt.model_dir, mode=opt.mode, resume=opt.resume) # Run Predictions Logger.section_break('Deploy') logger.info('Starting deployment...') err, acc, run_time, metrics = evaluate(model=trainer.model, trainer=trainer, data_loader=data_loader, logger=logger, tb_logger=tb_logger) dest_dir = opt.deploy_data + '_static_html' if opt.lab_config else opt.deploy_data metrics.save_predictions(start_datetime, run_time.avg, opt.model_dir, dest_dir) # compute hab accuracy hab_acc = metrics.compute_hab_acc() if opt.hab_eval else 'NOT EVALUATED' # plot confusion matrix and get mca _, mca_acc = metrics.compute_cm(plot=True) # plot roc curve _, _, auc_score = metrics.compute_roc_auc_score(plot=True) # plot precision recall curve _, _, average_precision = metrics.compute_precision_recall_ap_score( plot=True) Logger.section_break('DEPLOY COMPLETED') model_parameters = filter(lambda p: p.requires_grad, trainer.model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) metrics.print_eval(params, run_time, err, acc, metrics.results_dir, hab_accuracy=hab_acc, mean_class_accuracy=mca_acc, auc_score=auc_score['micro'], average_precision=average_precision['micro'])
def build_dataloader(self): self.train_loader, self.test_loader = dataloader.get_dataloader( self.batch_size, self.test_batch_size)
if num_gpu > 0: print("Use gpu : %d" % num_gpu) if num_gpu > 1: model = torch.nn.DataParallel(model) print("make parallel") model = model.cuda() print("GPU setting Done") ###################################stage Enc setting ############################################## if (not args.decoder_only): logger, this_savedir = info_setting(train_config['save_dir'], train_config["Model"]) logger.flush() logdir = this_savedir.split(train_config['save_dir'])[1] trainLoader, trainLoader_scale1, trainLoader_scale2, trainLoader_scale3, trainLoader_scale4, valLoader, data \ = get_dataloader(data_config) weight = torch.from_numpy( data['classWeights']) # convert the numpy array to torch weight[-1] = 0 if num_gpu > 0: weight = weight.cuda() print(weight) criteria = CrossEntropyLoss2d(weight) if num_gpu > 0: criteria = criteria.cuda() params_set = [] names_set = [] if args.wd_tfmode:
def eval(cfg): os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpus cfg.weights = [1, 1] model = AsymNet(cfg) device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu' print("Loading checkpoint...") ckpt = torch.load(cfg.checkpoint, map_location=device) model.load_state_dict(ckpt['model']) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model = model.to(device) # Define Dataset train_dataset = DeepFashion2Dataset( './dataset/deepfashion2/validation/annots.json', './dataset/validation/image', get_transform(), False) data_loader = dataloader.get_dataloader( train_dataset, batch_size=(cfg.n_frames + 1), n_frames=cfg.n_frames, n_shops=1, is_parallel=torch.cuda.device_count() > 1) model.eval() cfg.save_folder = os.path.join(cfg.save_folder, cfg.checkpoint.split('/')[-2]) if not os.path.isfile(os.path.join(cfg.save_folder, 'shops.pth')) \ or not os.path.isfile(os.path.join(cfg.save_folder, 'videos.pth')): os.makedirs(cfg.save_folder, exist_ok=True) dict_shops = {} dict_videos = {} with tqdm(total=len(data_loader), desc='Extracting features', unit='iter', dynamic_ncols=True) as pbar: for i, batch in enumerate(data_loader): imgs = batch[0] imgs = imgs.to(device) pair_key = batch[1][0] pair_key = str(pair_key[0].detach().numpy()) + '_' + str( pair_key[1].detach().numpy()) with torch.no_grad(): shop_features = model.IFN(imgs[0, ...].unsqueeze(0)) video_features = torch.stack(model.VFN(imgs[1:, ...])) dict_shops.update({pair_key: shop_features.detach().cpu()}) dict_videos.update({pair_key: video_features.detach().cpu()}) pbar.update() pbar.write( f"{len(dict_shops.keys())} shops and {len(dict_videos.keys())} videos extracted" ) pbar.close() # torch.save(dict_shops, os.path.join(cfg.save_folder, 'shops.pth')) torch.save(dict_videos, os.path.join(cfg.save_folder, 'videos.pth')) else: dict_shops = torch.load(os.path.join(cfg.save_folder, 'shops.pth')) dict_videos = torch.load(os.path.join(cfg.save_folder, 'videos.pth')) if not os.path.isfile(os.path.join(cfg.save_folder, 'scores.pth')): keys = dict_shops.keys() scores = torch.zeros((len(keys), len(keys))) scores_mean = torch.zeros((len(keys), len(keys))) scores_max = torch.zeros((len(keys), len(keys))) step = cfg.n_shops with tqdm(total=len(keys), desc='Extracting matching score', unit='iter', dynamic_ncols=True) as pbar: for i, sh in enumerate(keys): sh_f = dict_shops[sh].repeat(step, 1) for j in range(0, len(keys), step): vi = [ list(keys)[jj] for jj in range(j, min([j + step, len(keys)])) ] vi_f = torch.cat([dict_videos[vii] for vii in vi], dim=1) if vi_f.size(1) != cfg.n_frames * step: vi_f = torch.cat([ vi_f, torch.zeros( (1, cfg.n_frames * step - vi_f.size(1), 1024)) ], dim=1) with torch.no_grad(): tmp, tmp_y = model.SN(sh_f.squeeze().to(device), vi_f.squeeze().to(device)) scores_mean[i, j:min([j + step, len(keys)])] = torch.diag( torch.mean(tmp_y.view( cfg.n_shops**2, cfg.n_frames, -1), dim=1).view(step, step) )[:min([j + step, len(keys)]) - j].detach().cpu() scores_max[i, j:min([j + step, len(keys)])] = torch.diag( torch.max(tmp_y.view( cfg.n_shops**2, cfg.n_frames, -1), dim=1)[0].view(step, step) )[:min([j + step, len(keys)]) - j].detach().cpu() scores[i, j:min([j + step, len(keys)])] = torch.diag( tmp.view( step, step))[:min([j + step, len(keys)]) - j].detach().cpu() pbar.update() torch.save( { 'scores': scores, 'scores_avg': scores_mean, 'scores_max': scores_max }, os.path.join(cfg.save_folder, 'scores.pth')) else: scores = torch.load(os.path.join(cfg.save_folder, 'scores.pth'))['scores'] scores_mean = torch.load(os.path.join(cfg.save_folder, 'scores.pth'))['scores_avg'] scores_max = torch.load(os.path.join(cfg.save_folder, 'scores.pth'))['scores_max'] top1, top5, top10, top15, top20 = compute_scores(scores_mean) print( f"Final score [AVG]:\nTop1Acc:{top1}\nTop5Acc:{top5}\nTop10Acc:{top10}\nTop15Acc:{top15}\nTop20Acc:{top20}\n" ) top1, top5, top10, top15, top20 = compute_scores(scores_max) print( f"Final score [MAX]:\nTop1Acc:{top1}\nTop5Acc:{top5}\nTop10Acc:{top10}\nTop15Acc:{top15}\nTop20Acc:{top20}\n" ) top1, top5, top10, top15, top20 = compute_scores(scores) print( f"Final score:\nTop1Acc:{top1}\nTop5Acc:{top5}\nTop10Acc:{top10}\nTop15Acc:{top15}\nTop20Acc:{top20}\n" )
def main(): if torch.cuda.is_available(): torch.cuda.set_device(arg.gpu_num) torch.cuda.current_device() if not os.path.exists('model'): os.makedirs('model') if not os.path.exists('log'): os.makedirs('log') model_path = 'model/model_{}'.format(arg.model)+str(arg.lr)+'_'+arg.net+'.pt' logger = logging.getLogger('netlog') logger.setLevel(logging.INFO) console = logging.StreamHandler() console.setLevel(logging.INFO) logger.addHandler(console) ch = logging.FileHandler('log/logfile_LSTM'+str(arg.lr)+'_'+arg.net+'.log') ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) logger.info("================================================") logger.info("Learning Rate: {}".format(arg.lr)) logger.info("Nbr of Epochs: {}".format(arg.epochs)) logger.info("Batch Size: {}".format(arg.batchSize)) logger.info("Window Size: {}".format(arg.windowSize)) logger.info("Hidden Layer Dimension: {}".format(arg.h_dim)) logger.info("GPU num: {}".format(arg.gpu_num)) logger.info('Model Type: {}'.format(arg.model)) #root_dir = 'UCF11_split' #train_path = root_dir+'/train' #test_path = root_dir+'/test' num_of_classes=10 trainLoader = get_dataloader(fold=[arg.train_fold], batch_size=1, shuffle=True, db_prepped=True) testLoader = get_dataloader(fold=[arg.test_fold], batch_size=1, shuffle=True, db_prepped=True) trainSize = len(trainLoader) testSize = len(testLoader) if arg.model == 'CRNN': if arg.net == 'VGG': model = VGG(arg.h_dim, num_of_classes) elif arg.net =='AlexNet': model = AlexNet(arg.h_dim, num_of_classes) else: model = UrbanCNN(net=arg.net, num_of_classes=num_of_classes) if arg.useGPU_f: model.cuda() optimizer = optim.Adam(model.parameters(),lr=arg.lr) criterion = nn.CrossEntropyLoss() optimizer.zero_grad() if arg.useGPU_f: hidden = ( Variable(torch.randn(1,arg.batchSize,arg.h_dim).cuda(),requires_grad=False), Variable(torch.randn(1,arg.batchSize,arg.h_dim).cuda(),requires_grad=False)) else: hidden = ( Variable(torch.randn(1,arg.batchSize,arg.h_dim),requires_grad=True), Variable(torch.randn(1,arg.batchSize,arg.h_dim),requires_grad=True)) min_acc=0.0 ########################## ##### Start Training ##### ########################## epochs = arg.epochs if arg.train_f else 0 for epoch in range(epochs): model.train() optimizer.zero_grad() for batchIdx,(windowBatch,labelBatch) in enumerate(trainLoader): #loss=0.0 if arg.useGPU_f: y=torch.zeros(arg.batchSize, num_of_classes).cuda() windowBatch = Variable(windowBatch.cuda(),requires_grad=True).float() labelBatch = Variable(labelBatch.cuda(),requires_grad=False).long() else: y=torch.zeros(arg.batchSize, num_of_classes) windowBatch = Variable(windowBatch,requires_grad=True).float() labelBatch = Variable(labelBatch,requires_grad=False).long() windowSize = windowBatch.shape[1] for i in range(windowSize): imgBatch = windowBatch[:,i,:,:,:] if arg.model == 'CRNN': temp,hidden = model(imgBatch,hidden) (h,c) = hidden hidden = (h.detach(), c.detach()) else: temp = model(imgBatch) #loss_ = criterion(temp,labelBatch) #loss+=loss_.data y += temp Y=y/windowSize Y = Variable(Y, requires_grad=True) loss = criterion(Y,labelBatch) loss.backward() optimizer.step() optimizer.zero_grad() _,pred = torch.max(Y,1) ### prediction should after averging the array train_acc = (pred == labelBatch.data).sum() train_acc = 100.0*train_acc.data.cpu().numpy()/arg.batchSize #print('train acc', train_acc, 'train loss', loss.data.cpu()) if batchIdx%10==0: logger.info("epochs:{}, train loss:{}, train acc:{}".format(epoch, loss.data.cpu(), train_acc)) ######################## ### Start Validation ### ######################## model.eval() val_acc=0.0 for batchIdx,(windowBatch,labelBatch) in enumerate(testLoader): if arg.useGPU_f: y=torch.zeros(arg.batchSize, num_of_classes).cuda() windowBatch = Variable(windowBatch.cuda(),requires_grad=False).float() labelBatch = Variable(labelBatch.cuda(),requires_grad=False).long() else: y=torch.zeros(arg.batchSize, num_of_classes) windowBatch = Variable(windowBatch,requires_grad=False).float() labelBatch = Variable(labelBatch,requires_grad=False).long() windowSize = windowBatch.shape[1] for i in range(windowSize): imgBatch = windowBatch[:,i,:,:,:] if arg.model == 'CRNN': temp,hidden = model(imgBatch,hidden) (h,c) = hidden hidden = (h.detach(), c.detach()) else: temp = model(imgBatch) #loss_ = criterion(temp,labelBatch) #loss+=loss_.data y += temp Y=y/windowSize loss = criterion(Y,labelBatch) _,pred = torch.max(Y,1) val_acc = (pred == labelBatch.data).sum() val_acc = 100.0*val_acc.data.cpu().numpy()/arg.batchSize logger.info("==> val loss:{}, val acc:{}".format(loss.data.cpu().numpy(),val_acc)) if val_acc>min_acc: min_acc=val_acc torch.save(model.state_dict(), model_path) ########################## ##### Start Testing ##### ########################## model.eval() torch.no_grad() test_acc=0.0 if os.path.isfile(model_path): model.load_state_dict(torch.load(model_path)) for batchIdx,(windowBatch,labelBatch) in enumerate(testLoader): if arg.useGPU_f: y=torch.zeros(arg.batchSize, num_of_classes).cuda() windowBatch = Variable(windowBatch.cuda(),requires_grad=False).float() labelBatch = Variable(labelBatch.cuda(),requires_grad=False).long() else: y=torch.zeros(arg.batchSize, num_of_classes) windowBatch = Variable(windowBatch,requires_grad=False).float() labelBatch = Variable(labelBatch,requires_grad=False).long() windowSize = windowBatch.shape[1] for i in range(windowSize): imgBatch = windowBatch[:,i,:,:,:] if arg.model == 'CRNN': temp, hidden = model(imgBatch, hidden) (h, c) = hidden hidden = (h.detach(), c.detach()) else: temp = model(imgBatch) #loss_ = criterion(temp,labelBatch) #loss+=loss_.data y += temp Y=y/windowSize loss = criterion(Y,labelBatch) _,pred = torch.max(y,1) test_acc += (pred == labelBatch.data).sum() test_acc = 100.0*test_acc.data.cpu().numpy()/testSize logger.info("==> test loss:{}, test acc:{}".format(loss.data.cpu().numpy(),test_acc))
def main(): # Hyperparameters parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='', type=str, choices=['', '']) parser.add_argument('--feature_path', default='', help='path to datasets') parser.add_argument('--dropout', default=0.5, type=float, help='prefix of feature') parser.add_argument('--num_epochs', default=30, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=1, type=int, help='Size of a training mini-batch.') parser.add_argument('--iter_size', default=1, type=int, help='Size of a training mini-batch.') parser.add_argument('--weight_decay', default=0.005, type=float) parser.add_argument('--grad_clip', default=1., type=float, help='Gradient clipping threshold.') parser.add_argument('--learning_rate', default=.001, type=float, help='Initial learning rate.') parser.add_argument('--lr_update', default=15, type=int, help='Number of epochs to update the learning rate.') parser.add_argument('--workers', default=10, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=38, type=int, help='Number of steps to print and record the log.') parser.add_argument('--val_step', default=500, type=int, help='Number of steps to run validation.') parser.add_argument('--logger_name', default='runs/runX', help='Path to save the model and Tensorboard log.') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--storage_place', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') opt = parser.parse_args() logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) train_loader, val_loader = get_dataloader(opt) # Construct the model model = Model(opt) # model = torch.nn.DataParallel(model, device_ids=[0,1,2,3,4,5,6,7]).cuda() # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 for epoch in range(opt.num_epochs): adjust_learning_rate(opt, model.optimizer, epoch) # train for one epoch train(opt, train_loader, model, epoch) # evaluate on validation set rsum = validation(opt, val_loader, model, epoch) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='{}_{}_checkpoint.pth.tar'.format(opt.dataset, opt.modality), prefix=opt.logger_name + '/')
def train(cfg): os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpus cfg.weights = [1, cfg.n_shops // 2] model = AsymNet(cfg) device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu' optim_sd = None ep = 0 if cfg.pretrained is not None: print("Loading checkpoint...") ckpt = torch.load(cfg.pretrained, map_location=device) model.load_state_dict(ckpt['model']) ep = ckpt['e'] # optim_sd = ckpt['optimizer'] if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model = model.to(device) save_tensor = True name = f'{cfg.dataset}_TOT-E_{cfg.num_epoch}_LR_{cfg.lr}_SHOP_{cfg.n_shops}_FRAMES_{cfg.n_frames}_WEIGHTS_{cfg.weights}_TR_PHASE_{cfg.training_phase}' if save_tensor: writer = SummaryWriter(log_dir=os.path.join(cfg.log_dir, name)) # Define Dataset if cfg.dataset == 'deepfashion2': from data import dataloader train_dataset = DeepFashion2Dataset('./dataset/train/annots.json', './dataset/train/image', get_transform()) data_loader = dataloader.get_dataloader( train_dataset, batch_size=(cfg.n_frames + 1) * cfg.n_shops, n_frames=cfg.n_frames, n_shops=cfg.n_shops, is_parallel=torch.cuda.device_count() > 1) optimizer = optim.SGD(params=model.parameters(), lr=cfg.lr, momentum=0.9, weight_decay=0.0005) if optim_sd is not None: optimizer.load_state_dict(optim_sd) losses_match = [] losses_snn = [] model.train() for epoch in range(ep, cfg.num_epoch): with tqdm(total=len(data_loader), desc=f'Epoch {epoch + 1}/{cfg.num_epoch}', unit='iter', dynamic_ncols=True) as pbar: for i, batch in enumerate(data_loader): imgs = batch[0] imgs = imgs.to(device) scores, snn_score, l_match, l_snn = model( imgs, cfg.training_phase == 1) loss = l_match + l_snn losses_match.append(l_match.detach().cpu()) losses_snn.append(l_snn.detach().cpu()) if i % cfg.log_step == 0 and save_tensor: global_step = (epoch * len(data_loader)) + i log_loss_match = torch.stack(losses_match).mean() log_loss_snn = torch.stack(losses_snn).mean() writer.add_scalar('train/Loss_match', log_loss_match, global_step) writer.add_scalar('train/Loss_snn', log_loss_snn, global_step) writer.add_scalar('train/Loss_total', log_loss_match + log_loss_snn, global_step) pbar.write( 'Train Epoch_{} step_{}: loss : {},loss match : {},loss ssn : {}, max_snn : {}, min_snn : {}, max_score : {}, min_score : {}' .format(epoch, i, log_loss_match + log_loss_snn, log_loss_match, log_loss_snn, snn_score.max(), snn_score.min(), scores.max(), scores.min())) losses_match = [] losses_snn = [] if cfg.training_phase == 0: l_snn.backward() elif cfg.training_phase == 1: l_match.backward() else: loss.backward() optimizer.step() # model.zero_grad() pbar.update() os.makedirs(os.path.join(cfg.save_folder, name), exist_ok=True) if epoch % cfg.save_step == 0: save_path = os.path.join(cfg.save_folder, name, str(epoch) + '_model.pth') if torch.cuda.device_count() > 1: torch.save( { 'e': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() }, save_path) else: torch.save( { 'e': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, save_path) save_path = os.path.join(cfg.save_folder, name, 'last.pth') if torch.cuda.device_count() > 1: torch.save( { 'e': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() }, save_path) else: torch.save( { 'e': epoch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }, save_path)
default=False, help='Flag for quick development (STORE_FALSE)(default: True)') parser.add_argument('--debug', action='store_true', default=False, help='Flag to debug (STORE_FALSE)(default: True)') arg = parser.parse_args() since = time.time() if not arg.debug: if not os.path.exists(os.path.join('data/folds')): os.makedirs(os.path.join('data/folds')) print('Preparing dataset for fold {}'.format(arg.fold)) loader = get_dataloader(fold=[arg.fold], db_prepped=False, batch_size=1, shuffle=True, save=arg.save, quick_dev=arg.quick_dev) else: print('Testing dataloader for fold {}'.format(arg.fold)) loader = get_dataloader(fold=[arg.fold], db_prepped=True, batch_size=1, shuffle=True, save=arg.save, quick_dev=arg.quick_dev) window, label = next(iter(loader)) for ii in range(len(window)): img = window.numpy()[:, ii, :, :, :] lbl = label.numpy() print(ii, window.shape, window.min(), window.max(), window.dtype)