def BSN_Train_TEM(opt): writer = SummaryWriter() model = TEM(opt) model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda() optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) train_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="train"), batch_size=model.module.batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="validation"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["tem_step_size"], gamma=opt["tem_step_gamma"]) for epoch in range(opt["tem_epoch"]): scheduler.step() train_TEM(train_loader, model, optimizer, epoch, writer, opt) test_TEM(test_loader, model, epoch, writer, opt) writer.close()
def BMN_Train(opt): writer = SummaryWriter() model = BMN(opt).cuda() optimizer = optim.Adam(model.parameters(), lr=opt["training_lr"], weight_decay=opt["weight_decay"]) train_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="train"), batch_size=opt["batch_size"], shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=opt["batch_size"], shuffle=False, num_workers=8, pin_memory=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["step_size"], gamma=opt["step_gamma"]) for epoch in range(opt["train_epochs"]): scheduler.step() train_BMN(train_loader, model, optimizer, epoch, writer, opt) test_BMN(test_loader, model, epoch, writer, opt) writer.close()
def BMN_Train(opt): model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt["training_lr"], weight_decay=opt["weight_decay"]) train_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="train"), batch_size=opt["batch_size"], shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=opt["batch_size"], shuffle=False, num_workers=8, pin_memory=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["step_size"], gamma=opt["step_gamma"]) bm_mask = get_mask(opt["temporal_scale"]) for epoch in range(opt["train_epochs"]): scheduler.step() train_BMN(train_loader, model, optimizer, epoch, bm_mask) test_BMN(test_loader, model, epoch, bm_mask)
def train(self, n_epochs): exp_id = max([0] + [ int(run.split('_')[-1]) for run in os.listdir(self.cfg.TRAIN.LOG_DIR) ]) + 1 log_dir = os.path.join(self.cfg.TRAIN.LOG_DIR, 'run_' + str(exp_id)) if not os.path.isdir(os.path.dirname(log_dir)): os.makedirs(os.path.dirname(log_dir)) writer = SummaryWriter(log_dir) checkpoint_dir = os.path.join(self.cfg.MODEL.CHECKPOINT_DIR, 'checkpoint_' + str(exp_id)) assert not os.path.isdir( checkpoint_dir ), 'Checkpoint directory %s has already been created.' % checkpoint_dir os.makedirs(checkpoint_dir) train_loader = torch.utils.data.DataLoader( VideoDataSet(self.cfg, split=self.cfg.TRAIN.SPLIT), batch_size=self.cfg.TRAIN.BATCH_SIZE, shuffle=True, num_workers=12, pin_memory=True, collate_fn=self.train_collator) eval_loader = torch.utils.data.DataLoader( VideoDataSet(self.cfg, split=self.cfg.VAL.SPLIT), batch_size=self.cfg.VAL.BATCH_SIZE, shuffle=False, num_workers=12, pin_memory=True, drop_last=False, collate_fn=self.test_collator) bm_mask = get_mask(self.temporal_dim, self.max_duration).cuda() scores = [] for epoch in range(n_epochs): #print('Current LR: {}'.format(self.scheduler.get_last_lr()[0])) self.train_epoch(train_loader, bm_mask, epoch, writer) #self.scheduler.step() score = self.evaluate(eval_loader, self.cfg.VAL.SPLIT) state = { 'epoch': epoch + 1, 'score': score, 'state_dict': self.model.state_dict() } if len(scores) == 0 or score > max(scores): torch.save( state, os.path.join(checkpoint_dir, "best_{}.pth".format(self.cfg.EVAL_SCORE))) torch.save( state, os.path.join(checkpoint_dir, "model_{}.pth".format(epoch + 1))) writer.add_scalar(self.cfg.EVAL_SCORE, score, epoch) scores.append(score)
def BSN_inference_TEM(opt): model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) columns=["action","start","end","xmin","xmax"] for index_list,input_data,anchor_xmin,anchor_xmax in test_loader: TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx,full_idx in enumerate(index_list): video = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1) video_df = pd.DataFrame(video_result,columns=columns) video_df.to_csv("./output/TEM_results/"+video+".csv",index=False)
def BMN_inference(opt): model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() confidence_map, start, end = model(input_data) # print(start.shape,end.shape,confidence_map.shape) start_scores = start[0].detach().cpu().numpy() end_scores = end[0].detach().cpu().numpy() clr_confidence = (confidence_map[0][1]).detach().cpu().numpy() reg_confidence = (confidence_map[0][0]).detach().cpu().numpy() # 遍历起始分界点与结束分界点的组合 new_props = [] for idx in range(tscale): for jdx in range(tscale): start_index = idx end_index = jdx + 1 if start_index < end_index and end_index < tscale: xmin = start_index / tscale xmax = end_index / tscale xmin_score = start_scores[start_index] xmax_score = end_scores[end_index] clr_score = clr_confidence[idx, jdx] reg_score = reg_confidence[idx, jdx] score = xmin_score * xmax_score * clr_score * reg_score new_props.append([ xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score ]) new_props = np.stack(new_props) ######################################################################### col_name = [ "xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score" ] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)
def BSN_inference_TEM(opt): model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda() model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) # test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="trainval"), # batch_size=model.module.batch_size, shuffle=False, # num_workers=8, pin_memory=True,drop_last=False) columns=["action","start","end","xmin","xmax"] count = 0 for index_list,input_data,anchor_xmin,anchor_xmax in test_loader: #for video with different length # if opt['fix_scale'] is False: if opt['fix_scale'] == 'nonrescale': if len(anchor_xmin) != input_data.shape[2]: temporal_scale = input_data.shape[2] temporal_gap = 1. / temporal_scale anchor_xmin=[temporal_gap*i for i in range(temporal_scale)] anchor_xmin = [torch.tensor([x]) for x in anchor_xmin] anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)] anchor_xmax = [torch.tensor([x]) for x in anchor_xmax] ############################################################# TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx,full_idx in enumerate(index_list): video = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1) video_df = pd.DataFrame(video_result,columns=columns) video_df.to_csv("./output/"+opt["arch"]+opt["fix_scale"]+"_TEM_results/"+video+".csv",index=False) count += 1 if count % 100 == 0: print('finish', count) sys.stdout.flush()
def BMN_Train(opt): start_time = time.time() model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=list(range(opt['n_gpu']))).cuda() print('using {} gpus to train!'.format(opt['n_gpu'])) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt["training_lr"], weight_decay=opt["weight_decay"]) train_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="train"), batch_size=opt["batch_size"], shuffle=True, num_workers=opt['num_workers'], pin_memory=True) test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=opt["batch_size"], shuffle=False, num_workers=opt['num_workers'], pin_memory=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["step_size"], gamma=opt["step_gamma"]) bm_mask = get_mask(opt["temporal_scale"]) best_loss = 1e10 for epoch in range(opt["train_epochs"]): train_BMN(train_loader, model, optimizer, epoch, bm_mask) best_loss = test_BMN(test_loader, model, epoch, bm_mask, best_loss) scheduler.step() print("Total time (BMN_Train):", datetime.timedelta(seconds=time.time() - start_time))
def BSN_Train_TEM(opt): writer = SummaryWriter() model = TEM(opt) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() state_dict = torch.load('checkpoint/tem_best.pth.tar')['state_dict'] model.load_state_dict(state_dict) optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) train_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="train"), batch_size=model.module.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="validation"), batch_size=model.module.batch_size, shuffle=False, num_workers=4, pin_memory=True, drop_last=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["tem_step_size"], gamma=opt["tem_step_gamma"]) for epoch in range(opt["tem_epoch"]): train_TEM(train_loader, model, optimizer, epoch, writer, opt) scheduler.step() if (epoch + 1) % 3 == 0: test_TEM(test_loader, model, epoch, writer, opt) writer.close()
def inference(self, data_loader=None, split=None, batch_size=None): if not os.path.isdir('results/outputs/'): os.makedirs('results/outputs/') annotations = getDatasetDict( self.cfg.DATA.ANNOTATION_FILE, split) if self.cfg.DATASET == 'thumos' else None self.prop_gen = ProposalGenerator(self.temporal_dim, self.max_duration, annotations) self.post_processing = PostProcessor(self.cfg, split) if data_loader is None: data_loader = torch.utils.data.DataLoader( VideoDataSet(self.cfg, split=split), batch_size=batch_size, shuffle=False, num_workers=12, pin_memory=True, drop_last=False, collate_fn=self.test_collator) col_name = [ "xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_score", "score" ] self.model.eval() with torch.no_grad(): for video_names, env_features, agent_features, agent_masks in tqdm( data_loader): env_features = env_features.cuda( ) if self.cfg.USE_ENV else None agent_features = agent_features.cuda( ) if self.cfg.USE_AGENT else None agent_masks = agent_masks.cuda( ) if self.cfg.USE_AGENT else None confidence_map, start_map, end_map = self.model( env_features, agent_features, agent_masks) confidence_map = confidence_map.cpu().numpy() start_map = start_map.cpu().numpy() end_map = end_map.cpu().numpy() batch_props = self.prop_gen(start_map, end_map, confidence_map, video_names) for video_name, new_props in zip(video_names, batch_props): new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_feather("./results/outputs/" + video_name + ".feather") self.post_processing()
def BSN_inference_TEM(opt): ''' Inference of TEM step - 1. load the best_model step - 2. the output of TEM is three pdf-curve for each scaled-video ''' # step - 1 model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() model.eval() # step - 2 # set subset = 'full' to generate the pdf of all video test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) columns = ['action', 'start', 'end', 'xmin', 'xmax'] for index_list, input_data, anchor_xmin, anchor_xmax in test_loader: TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx, full_idx in enumerate(index_list): video_name = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action, video_start, video_end, anchor_xmin, anchor_xmax),axis=1) video_df = pd.DataFrame(video_result, columns=columns) video_df.to_csv('./output/TEM_results/' + video_name + '.csv', index=False)
def BMN_inference(opt): model = BMN(opt).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet( opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] tgap = 1. / tscale peak_thres = opt["pgm_threshold"] with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() start_end, confidence_map = model(input_data) start_scores = start_end[0][0].detach().cpu().numpy() end_scores = start_end[0][1].detach().cpu().numpy() clr_confidence = (confidence_map[0][0] * confidence_mask).detach().cpu().numpy() reg_confidence = (confidence_map[0][1] * confidence_mask).detach().cpu().numpy() max_start = max(start_scores) max_end = max(end_scores) #################################################################################################### # generate the set of start points and end points start_bins = np.zeros(len(start_scores)) start_bins[[0, -1]] = 1 # [1,0,0...,0,1] 首末两帧 for idx in range(1, tscale - 1): if start_scores[idx] > start_scores[ idx + 1] and start_scores[idx] > start_scores[idx - 1]: start_bins[idx] = 1 elif start_scores[idx] > (peak_thres * max_start): start_bins[idx] = 1 end_bins = np.zeros(len(end_scores)) end_bins[[0, -1]] = 1 for idx in range(1, tscale - 1): if end_scores[idx] > end_scores[ idx + 1] and end_scores[idx] > end_scores[idx - 1]: end_bins[idx] = 1 elif end_scores[idx] > (peak_thres * max_end): end_bins[idx] = 1 ######################################################################################################## xmin_list = [] xmin_score_list = [] xmax_list = [] xmax_score_list = [] for j in range(tscale): if start_bins[j] == 1: xmin_list.append( tgap / 2 + tgap * j) # [0.01,0.02]与gt的重合度高,那么实际上区间的中点才是分界点 xmin_score_list.append(start_scores[j]) if end_bins[j] == 1: xmax_list.append(tgap / 2 + tgap * j) xmax_score_list.append(end_scores[j]) ######################################################################### # 遍历起始分界点与结束分界点的组合 new_props = [] for ii in range(len(xmax_list)): tmp_xmax = xmax_list[ii] tmp_xmax_score = xmax_score_list[ii] for ij in range(len(xmin_list)): tmp_xmin = xmin_list[ij] tmp_xmin_score = xmin_score_list[ij] if tmp_xmin >= tmp_xmax: break start_point = int((tmp_xmin - tgap / 2) / tgap) end_point = int((tmp_xmax - tgap / 2) / tgap) duration = end_point - start_point clr_score = clr_confidence[duration, start_point] reg_score = reg_confidence[duration, start_point] score = tmp_xmax_score * tmp_xmax_score * np.sqrt( clr_score * reg_score) if score == 0: print(video_name, tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score, clr_score, reg_score, score, confidence_map[0, 0, duration, start_point], duration, start_point) new_props.append([ tmp_xmin, tmp_xmax, tmp_xmin_score, tmp_xmax_score, clr_score, reg_score, score ]) new_props = np.stack(new_props) ######################################################################### col_name = [ "xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score" ] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)
def main(): global args, best_prec1 args = parser.parse_args() check_rootfolders() categories, train_list, val_list, root_path, prefix = datasets_video.return_dataset( args.dataset, args.root_path) num_class = len(categories) global store_name store_name = '_'.join([ args.type, args.dataset, args.arch, 'segment%d' % args.num_segments, args.store_name ]) print(('storing name: ' + store_name)) if args.dataset == 'somethingv1' or args.dataset == 'somethingv2': # label transformation for left/right categories # please refer to labels.json file in sometingv2 for detail. target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } else: target_transforms = None model = TemporalModel(num_class, args.num_segments, model=args.type, backbone=args.arch, alpha=args.alpha, beta=args.beta, dropout=args.dropout, target_transforms=target_transforms) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = get_optim_policies(model) train_augmentation = model.get_augmentation() if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() if args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.module.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # Data loading code normalize = GroupNormalize(input_mean, input_std) train_loader = torch.utils.data.DataLoader(VideoDataSet( root_path, train_list, num_segments=args.num_segments, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataSet( root_path, val_list, num_segments=args.num_segments, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: prec1 = validate(val_loader, model, criterion, 0) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': args.start_epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) return log_training = open( os.path.join(args.checkpoint_dir, 'log', '%s.csv' % store_name), 'w') for epoch in range(args.start_epoch, args.epochs): # adjust learning rate adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, log_training) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion, (epoch + 1) * len(train_loader), log_training) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_loss, writer configs = get_and_save_args(parser) parser.set_defaults(**configs) dataset_configs = configs["dataset_configs"] model_configs = configs["model_configs"] args = parser.parse_args() if 'batch_size' in model_configs: args.batch_size = model_configs['batch_size'] if 'iter_size' in model_configs: args.iter_size = model_configs['iter_size'] model = TwoStageDetector(model_configs, roi_size=dataset_configs['roi_pool_size']) cnt = 0 for p in model.parameters(): cnt += p.data.numel() print(cnt) """copy codes and creat dir for saving models and logs""" if not os.path.isdir(args.snapshot_pref): os.makedirs(args.snapshot_pref) date = time.strftime('%Y%m%d%H%M', time.localtime(time.time())) logfile = os.path.join(args.snapshot_pref, date + '_train.log') get_logger(args, logfile) logging.info(' '.join(sys.argv)) logging.info('\ncreating folder: ' + args.snapshot_pref) if not args.evaluate: pass # writer = SummaryWriter(args.snapshot_pref) # make a copy of the entire project folder, which can cost huge space # recorder = Recorder(args.snapshot_pref, ["models", "__pycache__"]) # recorder.writeopt(args) logging.info('\nruntime args\n\n{}\n\nconfig\n\n{}'.format( args, dataset_configs)) logging.info(str(model)) logging.info(str(cnt)) if 'lr' in model_configs: args.lr = model_configs['lr'] logging.info('Using learning rate {}'.format(args.lr)) """construct model""" policies = model.get_optim_policies() model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() if args.resume: if os.path.isfile(args.resume): logging.info(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['state_dict']) logging.info(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: logging.info( ("=> no checkpoint found at '{}'".format(args.resume))) """construct dataset""" train_dataset = VideoDataSet( dataset_configs, prop_file=dataset_configs['train_prop_file'], ft_path=dataset_configs['train_ft_path'], epoch_multiplier=dataset_configs['training_epoch_multiplier'], test_mode=False) kwargs = {} kwargs['shuffle'] = True loss_kwargs = {} train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, drop_last=True, **kwargs) # in training we drop the last incomplete minibatch # val_loader = None val_loader = torch.utils.data.DataLoader(VideoDataSet( dataset_configs, prop_file=dataset_configs['test_prop_file'], ft_path=dataset_configs['test_ft_path'], epoch_multiplier=dataset_configs['testing_epoch_multiplier'], reg_stats=train_loader.dataset.stats, test_mode=False), batch_size=args.batch_size, shuffle=False, drop_last=True, num_workers=args.workers, pin_memory=True) logging.info('Dataloaders constructed') """loss and optimizer""" activity_criterion = torch.nn.CrossEntropyLoss(**loss_kwargs).cuda() completeness_criterion = CompletenessLoss().cuda() regression_criterion = ClassWiseRegressionLoss().cuda() # for group in policies: # logging.info(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( # group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.evaluate: validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, 0, -1) return print('Start training loop') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args.lr_steps) train(train_loader, model, activity_criterion, completeness_criterion, regression_criterion, optimizer, epoch) # evaluate on validation set latest_ckpt_path = args.snapshot_pref + \ '_'.join((args.dataset, 'latest', 'checkpoint.pth.tar')) ckpt = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_loss': 1000, 'reg_stats': torch.from_numpy(train_loader.dataset.stats) } torch.save(ckpt, latest_ckpt_path) if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: loss = validate(val_loader, model, activity_criterion, completeness_criterion, regression_criterion, (epoch + 1) * len(train_loader), epoch) # remember best validation loss and save checkpoint # loss = np.exp(-epoch/100) is_best = loss < best_loss best_loss = min(loss, best_loss) ckpt['best_loss'] = best_loss save_checkpoint(ckpt, is_best, epoch, filename='checkpoint.pth.tar')
# This net is used to provides setup settings. It is not used for testing. checkpoint = torch.load(args.weights) # pdb.set_trace() print("model epoch {} loss: {}".format(checkpoint['epoch'], checkpoint['best_loss'])) base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())} stats = checkpoint['reg_stats'].numpy() prop_file = dataset_configs['test_prop_file'] print('using prop_file ' + prop_file) dataset = VideoDataSet(dataset_configs, prop_file=prop_file, ft_path=dataset_configs['test_ft_path'], test_mode=True) print('Dataset Initilized') index_queue = ctx.Queue() result_queue = ctx.Queue() workers = [ctx.Process(target=runner_func, args=(dataset, base_dict, stats, gpu_list[i % len(gpu_list)], index_queue, result_queue)) for i in range(args.workers)] for w in workers: w.daemon = True w.start()
def main(): #*************************Processing Data************************** global args, best_prec1 args = parser.parse_args() check_rootfolders() # 对Something-something数据集进行预处理,将.txt文件读入内存 categories, train_list, val_list, root_path, prefix = datasets_video.return_dataset( args.dataset, args.root_path) num_class = len(categories) if args.dataset == 'somethingv1' or args.dataset == 'somethingv2': # label transformation for left/right categories # please refer to labels.json file in sometingv2 for detail. target_transforms = { 86: 87, 87: 86, 93: 94, 94: 93, 166: 167, 167: 166 } else: target_transforms = None #****************************Create Model*************************** model = getattr(CSN, args.arch)(num_class, target_transforms=target_transforms, mode=args.mode) crop_size = model.crop_size scale_size = model.scale_size input_mean = model.input_mean input_std = model.input_std policies = get_optim_policies(model) train_augmentation = model.get_augmentation() # ***************************Data loading code**************************** normalize = GroupNormalize(input_mean, input_std) train_loader = torch.utils.data.DataLoader(VideoDataSet( root_path, train_list, num_segments=args.num_segments, image_tmpl=prefix, transform=torchvision.transforms.Compose([ train_augmentation, Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(VideoDataSet( root_path, val_list, num_segments=args.num_segments, image_tmpl=prefix, random_shift=False, transform=torchvision.transforms.Compose([ GroupScale(int(scale_size)), GroupCenterCrop(crop_size), Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])), ToTorchFormatTensor( div=(args.arch not in ['BNInception', 'InceptionV3'])), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) #**************************Training config************************** device = 'cuda' if torch.cuda.is_available(): devices = ['cuda:' + id for id in args.gpus.split(',')] if len(devices) > 1: model = torch.nn.DataParallel(model, device_ids=devices) # 使用单机多卡进行训练 else: device = 'cpu' model = model.to(device) if args.resume: # 用于中断训练后继续训练 if os.path.isfile(args.resume): # 用指定的检查点进行训练 print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})".format( args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss().cuda() # 交叉熵损失 for group in policies: print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) optimizer = torch.optim.SGD(policies, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #******************************Training********************************** if args.evaluate: prec1 = validate(val_loader, model, criterion, 0) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': args.start_epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename='evaluate') return # 模型存储的名字 global store_name store_name = '_'.join([ args.type, args.dataset, args.arch, 'segment%d' % args.num_segments, args.store_name ]) log('storing name: ' + store_name, file=log_stream) for epoch in range(args.start_epoch, args.epochs): log("********************************\n", file=log_stream) log("EPOCH:" + str(epoch + 1) + "\n", file=log_stream) # adjust learning rate adjust_learning_rate(optimizer, epoch, args.lr_steps) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best, filename=str(epoch + 1)) log("********************************\n", file=log_stream)
print('Merge detection scores from {} sources...'.format( len(score_pickle_list))) detection_scores = {k: merge_scores(k) for k in score_pickle_list[0]} print('Done.') if 'deploy_prop_file' in dataset_configs: prop_file = dataset_configs['deploy_prop_file'] else: prop_file = dataset_configs['test_prop_file'] if 'deploy_online_slice' in dataset_configs: online_slice = dataset_configs['deploy_online_slice'] else: online_slice = dataset_configs.get('online_slice', False) dataset = VideoDataSet(dataset_configs, prop_file=prop_file, ft_path=dataset_configs['train_ft_path'], test_mode=True) from functools import reduce gt_lens = np.array( reduce(lambda x, y: x + y, [[(x.end_frame - x.start_frame) / 6 for x in v.gt] for v in dataset.video_list])) # pdb.set_trace() dataset_detections = [dict() for i in range(num_class)] def merge_all_vid_scores(pickle_list): def merge_op(arrs, index, weights): if arrs[0][index] is not None: return np.sum([a[index] * w for a, w in zip(arrs, weights)], axis=0)
def BMN_inference(opt): model = BMN(opt) model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() checkpoint = torch.load(opt["checkpoint_path"] + "/BMN_best.pth.tar") model.load_state_dict(checkpoint['state_dict']) model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt, subset="validation"), batch_size=1, shuffle=False, num_workers=8, pin_memory=True, drop_last=False) tscale = opt["temporal_scale"] # 100 with torch.no_grad(): for idx, input_data in test_loader: video_name = test_loader.dataset.video_list[idx[0]] input_data = input_data.cuda() confidence_map, start, end = model(input_data) #(1,2,100,100),(1,100),(1,100) #print(start.shape,end.shape,confidence_map.shape) start_scores = start[0].detach().cpu().numpy() # (100,) end_scores = end[0].detach().cpu().numpy() # (100,) clr_confidence = (confidence_map[0][1]).detach().cpu().numpy() # (100,100) reg_confidence = (confidence_map[0][0]).detach().cpu().numpy() # (100,100) # 获取得分的峰值 max_start = max(start_scores) max_end = max(end_scores) #################################################################################################### # generate the set of start points and end points start_bins = np.zeros(len(start_scores)) # [0,0,0,....,0] 100个时序点 start_bins[0] = 1 # 将第一个时序点置为1 for idx in range(1, tscale - 1): if start_scores[idx] > start_scores[idx + 1] and start_scores[idx] > start_scores[idx - 1]: start_bins[idx] = 1 elif start_scores[idx] > (0.5 * max_start): start_bins[idx] = 1 end_bins = np.zeros(len(end_scores)) end_bins[-1] = 1 # 将最后一个时序点置为1 for idx in range(1, tscale - 1): if end_scores[idx] > end_scores[idx + 1] and end_scores[idx] > end_scores[idx - 1]: end_bins[idx] = 1 elif end_scores[idx] > (0.5 * max_end): end_bins[idx] = 1 ######################################################################################################## ######################################################################### # 遍历起始分界点与结束分界点的组合 new_props = [] # 相当于遍历每种提议时长的每个时间点 for idx in range(tscale): # 用于索引duration,对于某一个idx,其对应提议的时长都相同 for jdx in range(tscale): # 用于遍历100时间点 start_index = jdx end_index = start_index + idx+1 if end_index < tscale and start_bins[start_index] == 1 and end_bins[end_index] == 1: xmin = start_index/tscale xmax = end_index/tscale xmin_score = start_scores[start_index] xmax_score = end_scores[end_index] clr_score = clr_confidence[idx, jdx] reg_score = reg_confidence[idx, jdx] score = xmin_score * xmax_score * clr_score*reg_score new_props.append([xmin, xmax, xmin_score, xmax_score, clr_score, reg_score, score]) new_props = np.stack(new_props) ######################################################################### col_name = ["xmin", "xmax", "xmin_score", "xmax_score", "clr_score", "reg_socre", "score"] new_df = pd.DataFrame(new_props, columns=col_name) new_df.to_csv("./output/BMN_results/" + video_name + ".csv", index=False)