def main(): device = paddle.set_device(FLAGS.device) paddle.disable_static(device) if FLAGS.dynamic else None train_transform = Compose([ GroupScale(), GroupMultiScaleCrop(), GroupRandomCrop(), GroupRandomFlip(), NormalizeImage() ]) train_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'train_10.list'), pickle_dir=os.path.join(FLAGS.data, 'train_10'), label_list=os.path.join(FLAGS.data, 'label_list'), transform=train_transform) val_transform = Compose( [GroupScale(), GroupCenterCrop(), NormalizeImage()]) val_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'val_10.list'), pickle_dir=os.path.join(FLAGS.data, 'val_10'), label_list=os.path.join(FLAGS.data, 'label_list'), mode='val', transform=val_transform) pretrained = FLAGS.eval_only and FLAGS.weights is None model = tsm_resnet50(num_classes=train_dataset.num_classes, pretrained=pretrained) step_per_epoch = int(len(train_dataset) / FLAGS.batch_size \ / ParallelEnv().nranks) optim = make_optimizer(step_per_epoch, model.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy(topk=(1, 5))) if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) model.evaluate(val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return if FLAGS.resume is not None: model.load(FLAGS.resume) model.fit(train_data=train_dataset, eval_data=val_dataset, epochs=FLAGS.epoch, batch_size=FLAGS.batch_size, save_dir=FLAGS.save_dir or 'tsm_checkpoint', num_workers=FLAGS.num_workers, drop_last=True, shuffle=True)
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None transform = Compose([GroupScale(), GroupCenterCrop(), NormalizeImage()]) dataset = KineticsDataset( pickle_file=FLAGS.infer_file, label_list=FLAGS.label_list, mode='test', transform=transform) labels = dataset.label_list model = tsm_resnet50( num_classes=len(labels), pretrained=FLAGS.weights is None) inputs = [Input([None, 8, 3, 224, 224], 'float32', name='image')] model.prepare(inputs=inputs, device=FLAGS.device) if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) imgs, label = dataset[0] pred = model.test_batch([imgs[np.newaxis, :]]) pred = labels[np.argmax(pred)] logger.info("Sample {} predict label: {}, ground truth label: {}" \ .format(FLAGS.infer_file, pred, labels[int(label)]))
def get_data(tfrecords_pattern, batch_size, epoch, width, height, length, sample_interval, training, num_threads, num_classes=400): dataset = KineticsDataset(tfrecords_pattern, batch_size, epoch, width, height, length, sample_interval, training, num_threads) frames, labels = dataset.input_fn() frames = tf.reshape(frames, (batch_size, length, height, width, 3)) labels_one_hot = tf.one_hot(labels, num_classes) labels_one_hot = tf.reshape(labels_one_hot, (batch_size, num_classes)) return frames, labels, labels_one_hot
def main(): paddle.enable_static() if FLAGS.static else None device = paddle.set_device(FLAGS.device) transform = Compose([GroupScale(), GroupCenterCrop(), NormalizeImage()]) dataset = KineticsDataset(pickle_file=FLAGS.infer_file, label_list=FLAGS.label_list, mode='test', transform=transform) labels = dataset.label_list model = tsm_resnet50(num_classes=len(labels), pretrained=FLAGS.weights is None) model.prepare() if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) imgs, label = dataset[0] pred = model.predict_batch([imgs[np.newaxis, :]]) pred = labels[np.argmax(pred)] print("Sample {} predict label: {}, ground truth label: {}" \ .format(FLAGS.infer_file, pred, labels[int(label)]))
def test_slowfast(args): config = parse_config(args.config_file) test_config = merge_configs(config, 'test', vars(args)) print_configs(test_config, "Test") if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) _nranks = ParallelEnv().nranks # num gpu bs_single = int(test_config.TEST.batch_size / _nranks) # batch_size of each gpu with fluid.dygraph.guard(place): #build model slowfast = SlowFast(cfg=test_config, num_classes=400) if args.weights: assert os.path.exists(args.weights + '.pdparams'),\ "Given weight dir {} not exist.".format(args.weights) logger.info('load test weights from {}'.format(args.weights)) model_dict, _ = fluid.load_dygraph(args.weights) slowfast.set_dict(model_dict) if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() slowfast = fluid.dygraph.parallel.DataParallel(slowfast, strategy) #create reader test_data = KineticsDataset(mode="test", cfg=test_config) test_sampler = DistributedBatchSampler(test_data, batch_size=bs_single, shuffle=False, drop_last=False) test_loader = DataLoader(test_data, batch_sampler=test_sampler, places=place, feed_list=None, num_workers=8, return_list=True) # start eval num_ensemble_views = test_config.TEST.num_ensemble_views num_spatial_crops = test_config.TEST.num_spatial_crops num_cls = test_config.MODEL.num_classes num_clips = num_ensemble_views * num_spatial_crops num_videos = len(test_data) // num_clips video_preds = np.zeros((num_videos, num_cls)) video_labels = np.zeros((num_videos, 1), dtype="int64") clip_count = {} print( "[EVAL] eval start, number of videos {}, total number of clips {}". format(num_videos, num_clips * num_videos)) slowfast.eval() for batch_id, data in enumerate(test_loader): # call net model_inputs = [data[0], data[1]] preds = slowfast(model_inputs, training=False) labels = data[2] clip_ids = data[3] # gather mulit card, results of following process in each card is the same. if _nranks > 1: preds = _all_gather(preds, _nranks) labels = _all_gather(labels, _nranks) clip_ids = _all_gather(clip_ids, _nranks) # to numpy preds = preds.numpy() labels = labels.numpy().astype("int64") clip_ids = clip_ids.numpy() # preds ensemble for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // num_clips ts_idx = int(clip_ids[ind]) % num_clips if vid_id not in clip_count: clip_count[vid_id] = [] if ts_idx in clip_count[vid_id]: print( "[EVAL] Passed!! read video {} clip index {} / {} repeatedly." .format(vid_id, ts_idx, clip_ids[ind])) else: clip_count[vid_id].append(ts_idx) video_preds[vid_id] += preds[ind] # ensemble method: sum if video_labels[vid_id].sum() > 0: assert video_labels[vid_id] == labels[ind] video_labels[vid_id] = labels[ind] if batch_id % args.log_interval == 0: print("[EVAL] Processing batch {}/{} ...".format( batch_id, len(test_data) // test_config.TEST.batch_size)) # check clip index of each video for key in clip_count.keys(): if len(clip_count[key]) != num_clips or sum( clip_count[key]) != num_clips * (num_clips - 1) / 2: print( "[EVAL] Warning!! video [{}] clip count [{}] not match number clips {}" .format(key, clip_count[key], num_clips)) video_preds = to_variable(video_preds) video_labels = to_variable(video_labels) acc_top1 = fluid.layers.accuracy(input=video_preds, label=video_labels, k=1) acc_top5 = fluid.layers.accuracy(input=video_preds, label=video_labels, k=5) print('[EVAL] eval finished, avg_acc1= {}, avg_acc5= {} '.format( acc_top1.numpy(), acc_top5.numpy()))
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None train_transform = Compose([ GroupScale(), GroupMultiScaleCrop(), GroupRandomCrop(), GroupRandomFlip(), NormalizeImage() ]) train_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'train_10.list'), pickle_dir=os.path.join(FLAGS.data, 'train_10'), label_list=os.path.join(FLAGS.data, 'label_list'), transform=train_transform) val_transform = Compose( [GroupScale(), GroupCenterCrop(), NormalizeImage()]) val_dataset = KineticsDataset( file_list=os.path.join(FLAGS.data, 'val_10.list'), pickle_dir=os.path.join(FLAGS.data, 'val_10'), label_list=os.path.join(FLAGS.data, 'label_list'), mode='val', transform=val_transform) pretrained = FLAGS.eval_only and FLAGS.weights is None model = tsm_resnet50(num_classes=train_dataset.num_classes, pretrained=pretrained) step_per_epoch = int(len(train_dataset) / FLAGS.batch_size \ / ParallelEnv().nranks) optim = make_optimizer(step_per_epoch, model.parameters()) inputs = [Input([None, 8, 3, 224, 224], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] model.prepare(optim, CrossEntropy(), metrics=Accuracy(topk=(1, 5)), inputs=inputs, labels=labels, device=FLAGS.device) if FLAGS.eval_only: if FLAGS.weights is not None: model.load(FLAGS.weights, reset_optimizer=True) model.evaluate(val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return if FLAGS.resume is not None: model.load(FLAGS.resume) model.fit(train_data=train_dataset, eval_data=val_dataset, epochs=FLAGS.epoch, batch_size=FLAGS.batch_size, save_dir='tsm_checkpoint', num_workers=FLAGS.num_workers, drop_last=True, shuffle=True)
def train(args): config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) valid_config = merge_configs(config, 'valid', vars(args)) print_configs(train_config, 'Train') # visual dl to visualize trianing process local_rank = fluid.dygraph.parallel.Env().local_rank if args.use_visualdl: try: from visualdl import LogWriter vdl_writer = LogWriter(args.vd_logdir + '/' + str(local_rank)) except: print( "visualdl is not installed, please install visualdl if you want to use" ) if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) random.seed(0) np.random.seed(0) paddle.framework.seed(0) with fluid.dygraph.guard(place): # 1. init net if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() video_model = SlowFast(cfg=train_config, num_classes=400) if args.use_data_parallel: video_model = fluid.dygraph.parallel.DataParallel( video_model, strategy) bs_denominator = 1 if args.use_gpu: gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") if gpus == "": pass else: gpus = gpus.split(",") num_gpus = len(gpus) assert num_gpus == train_config.TRAIN.num_gpus, \ "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \ "shoud be the same as that" \ "set in {}({})".format( num_gpus, args.config, train_config.TRAIN.num_gpus) bs_denominator = train_config.TRAIN.num_gpus # 2. reader and optimizer bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator) bs_val_single = int(train_config.VALID.batch_size / bs_denominator) train_data = KineticsDataset(mode="train", cfg=train_config) valid_data = KineticsDataset(mode="valid", cfg=valid_config) train_sampler = DistributedBatchSampler(train_data, batch_size=bs_train_single, shuffle=True, drop_last=True) train_loader = DataLoader(train_data, batch_sampler=train_sampler, places=place, feed_list=None, num_workers=8, return_list=True) valid_sampler = DistributedBatchSampler(valid_data, batch_size=bs_val_single, shuffle=False, drop_last=False) valid_loader = DataLoader(valid_data, batch_sampler=valid_sampler, places=place, feed_list=None, num_workers=8, return_list=True) train_iter_num = len(train_loader) optimizer = create_optimizer(train_config.TRAIN, train_iter_num, video_model.parameters()) #3. load checkpoint if args.resume: saved_path = "slowfast_epoch" #default model_path = saved_path + args.resume_epoch assert os.path.exists(model_path + ".pdparams"), \ "Given dir {}.pdparams not exist.".format(model_path) assert os.path.exists(model_path + ".pdopt"), \ "Given dir {}.pdopt not exist.".format(model_path) para_dict, opti_dict = fluid.dygraph.load_dygraph(model_path) video_model.set_dict(para_dict) optimizer.set_dict(opti_dict) if args.use_visualdl: # change log path otherwise log history will be overwritten vdl_writer = LogWriter(args.vd_logdir + args.resume_epoch + '/' + str(local_rank)) # 4. train loop reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() for epoch in range(train_config.TRAIN.epoch): epoch_start = time.time() if args.resume and epoch <= args.resume_epoch: print("epoch:{}<=args.resume_epoch:{}, pass".format( epoch, args.resume_epoch)) continue video_model.train() total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 print('start for, Epoch {}/{} '.format(epoch, train_config.TRAIN.epoch)) batch_start = time.time() for batch_id, data in enumerate(train_loader): reader_cost_averager.record(time.time() - batch_start) y_data = data[2] labels = to_variable(y_data) labels.stop_gradient = True model_inputs = [data[0], data[1]] # 4.1.1 call net() preds = video_model(model_inputs, training=True) loss_out = fluid.layers.softmax_with_cross_entropy( logits=preds, label=labels) avg_loss = fluid.layers.mean(loss_out) acc_top1 = fluid.layers.accuracy(input=preds, label=labels, k=1) acc_top5 = fluid.layers.accuracy(input=preds, label=labels, k=5) # 4.1.2 call backward() if args.use_data_parallel: avg_loss = video_model.scale_loss(avg_loss) avg_loss.backward() video_model.apply_collective_grads() else: avg_loss.backward() # 4.1.3 call minimize() optimizer.minimize(avg_loss) video_model.clear_gradients() avg_loss_value = avg_loss.numpy()[0] acc_top1_value = acc_top1.numpy()[0] acc_top5_value = acc_top5.numpy()[0] total_loss += avg_loss_value total_acc1 += acc_top1_value total_acc5 += acc_top5_value total_sample += 1 if args.use_visualdl: vdl_writer.add_scalar(tag="train/loss", step=epoch * train_iter_num + batch_id, value=avg_loss.numpy()) vdl_writer.add_scalar(tag="train/err1", step=epoch * train_iter_num + batch_id, value=1.0 - acc_top1.numpy()) vdl_writer.add_scalar(tag="train/err5", step=epoch * train_iter_num + batch_id, value=1.0 - acc_top5.numpy()) batch_cost_averager.record(time.time() - batch_start, num_samples=bs_train_single) if batch_id % args.log_interval == 0: print( "[Epoch %d, batch %d] loss %.5f, err1 %.5f, err5 %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f samples/sec" % (epoch, batch_id, avg_loss_value, 1.0 - acc_top1_value, 1. - acc_top5_value, batch_cost_averager.get_average(), reader_cost_averager.get_average(), batch_cost_averager.get_ips_average())) reader_cost_averager.reset() batch_cost_averager.reset() batch_start = time.time() train_epoch_cost = time.time() - epoch_start print( '[Epoch %d end] avg_loss %.5f, avg_err1 %.5f, avg_err5= %.5f, epoch_cost: %.5f sec' % \ (epoch, total_loss / total_sample, 1. - total_acc1 / total_sample, 1. - total_acc5 / total_sample, train_epoch_cost)) if args.use_visualdl: vdl_writer.add_scalar(tag="train_epoch/loss", step=epoch, value=total_loss / total_sample) vdl_writer.add_scalar(tag="train_epoch/err1", step=epoch, value=1. - total_acc1 / total_sample) vdl_writer.add_scalar(tag="train_epoch/err5", step=epoch, value=1. - total_acc5 / total_sample) # 4.3 do preciseBN if valid_config.VALID.use_preciseBN and epoch % valid_config.VALID.preciseBN_interval == 0: print("do precise BN in epoch {} ...".format(epoch)) precise_BN( video_model, train_loader, min(valid_config.VALID.num_batches_preciseBN, len(train_loader))) # 4.3 save checkpoint if local_rank == 0: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) model_path = os.path.join(args.save_dir, "slowfast_epoch{}".format(epoch)) fluid.dygraph.save_dygraph(video_model.state_dict(), model_path) fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path) print('save_dygraph End, Epoch {}/{} '.format( epoch, train_config.TRAIN.epoch)) # 4.4 validation video_model.eval() val(epoch, video_model, valid_loader, args.use_visualdl) logger.info('[TRAIN] training finished')
def infer_slowfast(args): config = parse_config(args.config_file) infer_config = merge_configs(config, 'infer', vars(args)) print_configs(infer_config, "Infer") if not os.path.isdir(infer_config.INFER.save_path): os.makedirs(infer_config.INFER.save_path) if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) _nranks = ParallelEnv().nranks # num gpu bs_single = int(infer_config.INFER.batch_size / _nranks) # batch_size of each gpu with fluid.dygraph.guard(place): #build model slowfast = SlowFast(cfg=infer_config, num_classes=400) if args.weights: assert os.path.exists(args.weights + '.pdparams'),\ "Given weight dir {} not exist.".format(args.weights) logger.info('load test weights from {}'.format(args.weights)) model_dict, _ = fluid.load_dygraph(args.weights) slowfast.set_dict(model_dict) if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() slowfast = fluid.dygraph.parallel.DataParallel( slowfast, strategy, find_unused_parameters=False) #create reader infer_data = KineticsDataset(mode="infer", cfg=infer_config) infer_sampler = DistributedBatchSampler(infer_data, batch_size=bs_single, shuffle=False, drop_last=False) infer_loader = DataLoader(infer_data, batch_sampler=infer_sampler, places=place, feed_list=None, num_workers=0, return_list=True) # start infer num_ensemble_views = infer_config.INFER.num_ensemble_views num_spatial_crops = infer_config.INFER.num_spatial_crops num_cls = infer_config.MODEL.num_classes num_clips = num_ensemble_views * num_spatial_crops num_videos = len(infer_data) // num_clips video_preds = np.zeros((num_videos, num_cls)) clip_count = {} video_paths = [] with open(infer_config.INFER.filelist, "r") as f: for path in f.read().splitlines(): video_paths.append(path) print( "[INFER] infer start, number of videos {}, number of clips {}, total number of clips {}" .format(num_videos, num_clips, num_clips * num_videos)) slowfast.eval() for batch_id, data in enumerate(infer_loader): # call net model_inputs = [data[0], data[1]] preds = slowfast(model_inputs, training=False) clip_ids = data[3] # gather mulit card, results of following process in each card is the same. if _nranks > 1: preds = _all_gather(preds, _nranks) clip_ids = _all_gather(clip_ids, _nranks) # to numpy preds = preds.numpy() clip_ids = clip_ids.numpy() # preds ensemble for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // num_clips ts_idx = int(clip_ids[ind]) % num_clips if vid_id not in clip_count: clip_count[vid_id] = [] if ts_idx in clip_count[vid_id]: print( "[INFER] Passed!! read video {} clip index {} / {} repeatedly." .format(vid_id, ts_idx, clip_ids[ind])) else: clip_count[vid_id].append(ts_idx) video_preds[vid_id] += preds[ind] # ensemble method: sum if batch_id % args.log_interval == 0: print("[INFER] Processing batch {}/{} ...".format( batch_id, len(infer_data) // infer_config.INFER.batch_size)) # check clip index of each video for key in clip_count.keys(): if len(clip_count[key]) != num_clips or sum( clip_count[key]) != num_clips * (num_clips - 1) / 2: print( "[INFER] Warning!! video [{}] clip count [{}] not match number clips {}" .format(key, clip_count[key], num_clips)) res_list = [] for j in range(video_preds.shape[0]): pred = to_variable(video_preds[j] / num_clips) #mean prob video_path = video_paths[j] pred = to_variable(pred) top1_values, top1_indices = fluid.layers.topk(pred, k=1) top5_values, top5_indices = fluid.layers.topk(pred, k=5) top1_values = top1_values.numpy().astype("float64")[0] top1_indices = int(top1_indices.numpy()[0]) top5_values = list(top5_values.numpy().astype("float64")) top5_indices = [int(item) for item in top5_indices.numpy() ] #np.int is not JSON serializable print( "[INFER] video id [{}], top1 value {}, top1 indices {}".format( video_path, top1_values, top1_indices)) print( "[INFER] video id [{}], top5 value {}, top5 indices {}".format( video_path, top5_values, top5_indices)) save_dict = { 'video_id': video_path, 'top1_values': top1_values, 'top1_indices': top1_indices, 'top5_values': top5_values, 'top5_indices': top5_indices } res_list.append(save_dict) with open( os.path.join(infer_config.INFER.save_path, 'result' + '.json'), 'w') as f: json.dump(res_list, f) print('[INFER] infer finished, results saved in {}'.format( infer_config.INFER.save_path))