def __init__(self, Net, device, global_records, config): # Initializations self.device = device self.global_records = global_records self.config = config self.logger = logging.getLogger(__name__) # Initialize network self.net = Net(**self.config['net']) # Then load its params if available if self.config['net'].get('saved_params_path', None) is not None: self.load_net(self.config['net']['saved_params_path']) # Initialize optimizer self.setup_optimizer() # Initialize learning rate scheduler self.setup_lr_scheduler() # Transfer network to device self.net.to(self.device) self.logger.info(self.net) self.logger.info("Number of parameters: %d" % (count_parameters(self.net))) # Losses for all models (more can be defined in derived models if needed) self.mse_loss_fn = nn.MSELoss(reduction='none') self.mae_loss_fn = nn.L1Loss(reduction='none') # Initialize epoch number self.epoch = 0
def main(): torch.manual_seed(1) if args.cuda: torch.cuda.manual_seed(1) torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True exp_dir = os.path.join("data", args.exp_name) make_dir_if_not_exist(exp_dir) model = Resnet(device, args.num_classes) model = nn.DataParallel(model, device_ids=args.gpu_devices) model.to(device) model_dict = None if args.ckp: if os.path.isfile(args.ckp): print("=> Loading checkpoint '{}'".format(args.ckp)) model_dict = torch.load(args.ckp) print("=> Loaded checkpoint '{}'".format(args.ckp)) if (args.mode == 'demo') and (model_dict is None): print("Please specify model path") return if model_dict is not None: model.load_state_dict(model_dict['state_dict']) cudnn.benchmark = True params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: params += [{'params': [value]}] print("Number of trainable params - {}".format(count_parameters(model))) criterion = torch.nn.NLLLoss(None, ignore_index=255) optimizer = optim.Adam(params, lr=args.lr) if args.mode == 'demo': train_data_loader, test_data_loader = sample_data(args.dset) test(test_data_loader, model, criterion, demo=True) return for epoch in range(1, args.epochs + 1): train_data_loader, test_data_loader = sample_data(args.dset) test(test_data_loader, model, criterion) train(train_data_loader, model, criterion, optimizer, epoch) model_to_save = { "epoch": epoch + 1, 'state_dict': model.state_dict(), } if epoch % args.ckp_freq == 0: file_name = os.path.join(exp_dir, "checkpoint_" + str(epoch) + ".pth") save_checkpoint(model_to_save, file_name)
def start_training(): logger.info("Setup config, data and model...") opt = BaseOptions().parse() set_seed(opt.seed) if opt.debug: # keep the model run deterministically # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config. # Enable this only when input size is fixed. cudnn.benchmark = False cudnn.deterministic = True opt.writer = SummaryWriter(opt.tensorboard_log_dir) opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n" opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n" train_dataset = ExCLDataset( dset_name=opt.dset_name, data_path=opt.train_path, desc_bert_path_or_handler=opt.desc_bert_path, sub_bert_path_or_handler=opt.sub_bert_path, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, vid_feat_path_or_handler=opt.vid_feat_path, clip_length=opt.clip_length, ctx_mode=opt.ctx_mode, h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat, ) if opt.eval_path is not None: eval_dataset = ExCLDataset( dset_name=opt.dset_name, data_path=opt.eval_path, desc_bert_path_or_handler=train_dataset.desc_bert_h5, sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None, clip_length=opt.clip_length, ctx_mode=opt.ctx_mode, h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat, video_duration_idx_path=opt.video_duration_idx_path, eval_split_name=opt.eval_split_name ) else: eval_dataset = None model_config = EDict( visual_input_size=opt.vid_feat_size, sub_input_size=opt.sub_feat_size, # for both desc and subtitles query_input_size=opt.q_feat_size, # for both desc and subtitles hidden_size=opt.hidden_size, drop=opt.drop, ctx_mode=opt.ctx_mode, # video, sub or video_sub initializer_range=opt.initializer_range ) logger.info("model_config {}".format(model_config)) model = EXCL(model_config) count_parameters(model) logger.info("Start Training...") train(model, train_dataset, eval_dataset, opt) return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug
print("Log save path:", log_path) # ----------------------- Dataset ----------------------- train_dataset = SyntheticTrainingDataset(npz_path=train_path, params_from='all') val_dataset = SyntheticTrainingDataset(npz_path=val_path, params_from='all') train_val_monitor_datasets = [train_dataset, val_dataset] print("Training examples found:", len(train_dataset)) print("Validation examples found:", len(val_dataset)) # ----------------------- Models ----------------------- # Regressor regressor = SingleInputRegressor(resnet_in_channels, resnet_layers, ief_iters=ief_iters) num_params = count_parameters(regressor) print("\nRegressor model Loaded. ", num_params, "trainable parameters.") # SMPL model smpl_model = SMPL(config.SMPL_MODEL_DIR, batch_size=batch_size) # Camera and NMR part/silhouette renderer # Assuming camera rotation is identity (since it is dealt with by global_orients in SMPL) mean_cam_t = np.array([0., 0.2, 42.]) mean_cam_t = torch.from_numpy(mean_cam_t).float().to(device) mean_cam_t = mean_cam_t[None, :].expand(batch_size, -1) cam_K = get_intrinsics_matrix(config.REGRESSOR_IMG_WH, config.REGRESSOR_IMG_WH, config.FOCAL_LENGTH) cam_K = torch.from_numpy(cam_K.astype(np.float32)).to(device) cam_K = cam_K[None, :, :].expand(batch_size, -1, -1) cam_R = torch.eye(3).to(device)
def start_training(): logger.info("Setup config, data and model...") opt = BaseOptions().parse() set_seed(opt.seed) if opt.debug: # keep the model run deterministically # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config. # Enable this only when input size is fixed. cudnn.benchmark = False cudnn.deterministic = True opt.writer = SummaryWriter(opt.tensorboard_log_dir) opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n" opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n" train_dataset = StartEndDataset( dset_name=opt.dset_name, data_path=opt.train_path, desc_bert_path_or_handler=opt.desc_bert_path, sub_bert_path_or_handler=opt.sub_bert_path, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, vid_feat_path_or_handler=opt.vid_feat_path, clip_length=opt.clip_length, ctx_mode=opt.ctx_mode, h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat, ) if opt.eval_path is not None: # val dataset, used to get eval loss train_eval_dataset = StartEndDataset( dset_name=opt.dset_name, data_path=opt.eval_path, desc_bert_path_or_handler=train_dataset.desc_bert_h5, sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None, clip_length=opt.clip_length, ctx_mode=opt.ctx_mode, h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat ) eval_dataset = StartEndEvalDataset( dset_name=opt.dset_name, eval_split_name=opt.eval_split_name, # should only be val set data_path=opt.eval_path, desc_bert_path_or_handler=train_dataset.desc_bert_h5, sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, video_duration_idx_path=opt.video_duration_idx_path, vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None, clip_length=opt.clip_length, ctx_mode=opt.ctx_mode, data_mode="query", h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat ) else: eval_dataset = None model_config = EDict( merge_two_stream=not opt.no_merge_two_stream, # merge video and subtitles cross_att=not opt.no_cross_att, # use cross-attention when encoding video and subtitles span_predictor_type=opt.span_predictor_type, # span_predictor_type encoder_type=opt.encoder_type, # gru, lstm, transformer add_pe_rnn=opt.add_pe_rnn, # add pe for RNNs pe_type=opt.pe_type, # visual_input_size=opt.vid_feat_size, sub_input_size=opt.sub_feat_size, # for both desc and subtitles query_input_size=opt.q_feat_size, # for both desc and subtitles hidden_size=opt.hidden_size, # stack_conv_predictor_conv_kernel_sizes=opt.stack_conv_predictor_conv_kernel_sizes, # conv_kernel_size=opt.conv_kernel_size, conv_stride=opt.conv_stride, max_ctx_l=opt.max_ctx_l, max_desc_l=opt.max_desc_l, input_drop=opt.input_drop, cross_att_drop=opt.cross_att_drop, drop=opt.drop, n_heads=opt.n_heads, # self-att heads initializer_range=opt.initializer_range, # for linear layer ctx_mode=opt.ctx_mode, # video, sub or video_sub margin=opt.margin, # margin for ranking loss ranking_loss_type=opt.ranking_loss_type, # loss type, 'hinge' or 'lse' lw_neg_q=opt.lw_neg_q, # loss weight for neg. query and pos. context lw_neg_ctx=opt.lw_neg_ctx, # loss weight for pos. query and neg. context lw_st_ed=0, # will be assigned dynamically at training time use_hard_negative=False, # reset at each epoch hard_pool_size=opt.hard_pool_size, use_self_attention=not opt.no_self_att, # whether to use self attention no_modular=opt.no_modular ) logger.info("model_config {}".format(model_config)) model = XML(model_config) count_parameters(model) logger.info("Start Training...") train(model, train_dataset, train_eval_dataset, eval_dataset, opt) return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug
def start_training(): logger.info("Setup config, data and model...") opt = BaseOptions().parse() set_seed(opt.seed) if opt.debug: # keep the model run deterministically # 'cudnn.benchmark = True' enabled auto finding the best algorithm for a specific input/net config. # Enable this only when input size is fixed. cudnn.benchmark = False cudnn.deterministic = True opt.writer = SummaryWriter(opt.tensorboard_log_dir) opt.train_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Loss] {loss_str}\n" opt.eval_log_txt_formatter = "{time_str} [Epoch] {epoch:03d} [Metrics] {eval_metrics_str}\n" train_dataset = RetrievalDataset( dset_name=opt.dset_name, data_path=opt.train_path, desc_bert_path_or_handler=opt.desc_bert_path, sub_bert_path_or_handler=opt.sub_bert_path, vid_feat_path_or_handler=opt.vid_feat_path, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, ctx_mode=opt.ctx_mode, h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat, ) if opt.eval_path is not None: eval_dataset = RetrievalEvalDataset( dset_name=opt.dset_name, eval_split_name=opt.eval_split_name, # should only be val set data_path=opt.eval_path, desc_bert_path_or_handler=train_dataset.desc_bert_h5, sub_bert_path_or_handler=train_dataset.sub_bert_h5 if "sub" in opt.ctx_mode else None, max_desc_len=opt.max_desc_l, max_ctx_len=opt.max_ctx_l, video_duration_idx_path=opt.video_duration_idx_path, vid_feat_path_or_handler=train_dataset.vid_feat_h5 if "video" in opt.ctx_mode else None, ctx_mode=opt.ctx_mode, data_mode="query", h5driver=opt.h5driver, data_ratio=opt.data_ratio, normalize_vfeat=not opt.no_norm_vfeat, normalize_tfeat=not opt.no_norm_tfeat, ) else: eval_dataset = None model_config = EDict( ctx_mode=opt.ctx_mode, text_input_size=opt.sub_feat_size, vid_input_size=opt.vid_feat_size, # output_size=opt.output_size, margin=opt.margin, # margin for ranking loss ) logger.info("model_config {}".format(model_config)) model = MEE(model_config) count_parameters(model) logger.info("Start Training...") train(model, train_dataset, eval_dataset, opt) return opt.results_dir, opt.eval_split_name, opt.eval_path, opt.debug