def test_mul(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") fc_0 = paddle.fluid.layers.fc(conv1, size=10) fc_1 = paddle.fluid.layers.fc(fc_0, size=10) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() # test backward search of concat pruned_program, _, _ = pruner.prune(main_program, scope, params=["conv1_weights"], ratios=[0.5], place=place, lazy=False, only_graph=True, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4, 3, 3, 3), "fc_0.w_0": (1024, 10), "fc_1.w_0": (10, 10) } for param in pruned_program.global_block().all_parameters(): if param.name in shapes.keys(): self.assertTrue(shapes[param.name] == param.shape)
def static_prune(self, net, ratios): paddle.enable_static() main_program = fluid.Program() startup_program = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) model = net(pretrained=False) out = model(input) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() main_program, _, _ = pruner.prune(main_program, scope, params=ratios.keys(), ratios=ratios.values(), place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape return shapes
def main(): cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['use_dataloader'] = False feed_vars, _ = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(infer_prog) pruner = Pruner() infer_prog, _, _ = pruner.prune( infer_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True) pruned_flops = flops(infer_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) exe.run(startup_prog) checkpoint.load_checkpoint(exe, infer_prog, cfg.weights) save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) criterion = 'bn_scale' idx_selector = 'optimal_threshold' pruner = Pruner(criterion) main_program, _, _ = pruner.prune( main_program, scope, params=["conv4_weights"], ratios=[0.5], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4L, 3L, 3L, 3L), "conv2_weights": (4L, 4L, 3L, 3L), "conv3_weights": (8L, 4L, 3L, 3L), "conv4_weights": (4L, 8L, 3L, 3L), "conv5_weights": (8L, 4L, 3L, 3L), "conv6_weights": (8L, 8L, 3L, 3L) } for param in main_program.global_block().all_parameters(): if "weights" in param.name: print("param: {}; param shape: {}".format(param.name, param.shape))
def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): """通道裁剪。 Args: program (paddle.fluid.Program): 需要裁剪的Program,Program的具体介绍可参见 https://paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/program.html#program。 prune_names (list): 由裁剪参数名组成的参数列表。 prune_ratios (list): 由裁剪率组成的参数列表,与prune_names中的参数列表意义对应。 place (paddle.fluid.CUDAPlace/paddle.fluid.CPUPlace): 运行设备。 only_graph (bool): 是否只修改网络图,当为False时代表同时修改网络图和 scope(全局作用域)中的参数。默认为False。 Returns: paddle.fluid.Program: 裁剪后的Program。 """ prog_var_shape_dict = {} for var in program.list_vars(): try: prog_var_shape_dict[var.name] = var.shape except Exception: pass index = 0 for param, ratio in zip(prune_names, prune_ratios): origin_num = prog_var_shape_dict[param][0] pruned_num = int(round(origin_num * ratio)) while origin_num == pruned_num: ratio -= 0.1 pruned_num = int(round(origin_num * (ratio))) prune_ratios[index] = ratio index += 1 scope = fluid.global_scope() pruner = Pruner() program, _, _ = pruner.prune(program, scope, params=prune_names, ratios=prune_ratios, place=place, lazy=False, only_graph=only_graph, param_backup=False, param_shape_backup=False) return program
def test_split(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(input, 4, 3, "conv2") split_0, split_1 = paddle.split(conv1, 2, axis=1) add = split_0 + conv2 out = conv_bn_layer(add, 4, 3, "conv3") out1 = conv_bn_layer(split_1, 4, 4, "conv4") shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() # test backward search of concat pruned_program, _, _ = pruner.prune(main_program, scope, params=["conv2_weights"], ratios=[0.5], place=place, lazy=False, only_graph=True, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (6, 3, 3, 3), "conv2_weights": (2, 3, 3, 3), "conv3_weights": (4, 2, 3, 3), "conv4_weights": (4, 4, 3, 3), } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name and "conv2d" in param.name: self.assertTrue(shapes[param.name] == param.shape)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) label = fluid.data(name='label', shape=[None, 1], dtype='int64') conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu') conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu') sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6') conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") sum3 = fluid.layers.sum([sum2, conv5]) conv6 = conv_bn_layer(sum3, 8, 3, "conv6") sub1 = conv6 - sum3 mult = sub1 * sub1 conv7 = conv_bn_layer( mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False) floored = fluid.layers.floor(conv7) scaled = fluid.layers.scale(floored) concated = fluid.layers.concat([scaled, mult], axis=1) conv8 = conv_bn_layer(concated, 8, 3, "conv8") feature = fluid.layers.reshape(conv8, [-1, 128, 16]) predict = fluid.layers.fc(input=feature, size=10, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) avg_cost = fluid.layers.mean(cost) adam_optimizer.minimize(avg_cost) params = [] for param in main_program.all_parameters(): if 'conv' in param.name: params.append(param.name) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) x = np.random.random(size=(10, 3, 16, 16)).astype('float32') label = np.random.random(size=(10, 1)).astype('int64') loss_data, = exe.run(main_program, feed={"image": x, "label": label}, fetch_list=[cost.name]) pruner = Pruner() main_program, _, _ = pruner.prune( main_program, fluid.global_scope(), params=params, ratios=[0.5] * len(params), place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None)
def main(): """ Main evaluate function """ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) # define executor place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, loader = model.build_inputs(**inputs_def) if multi_scale_test is None: fetches = model.eval(feed_vars) else: fetches = model.eval(feed_vars, multi_scale_test) eval_prog = eval_prog.clone(True) exe.run(startup_prog) reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(reader, place) loader.set_sample_list_generator(reader) dataset = cfg['EvalReader']['dataset'] # eval already exists json file if FLAGS.json_eval: logger.info( "In json_eval mode, PaddleDetection will evaluate json files in " "output_eval directly. And proposal.json, bbox.json and mask.json " "will be detected by default.") json_eval_results( cfg.metric, json_directory=FLAGS.output_eval, dataset=dataset) return pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(eval_prog) pruner = Pruner() eval_prog, _, _ = pruner.prune( eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False) pruned_flops = flops(eval_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) compile_program = fluid.CompiledProgram(eval_prog).with_data_parallel() assert cfg.metric != 'OID', "eval process of OID dataset \ is not supported." if cfg.metric == "WIDERFACE": raise ValueError("metric type {} does not support in tools/eval.py, " "please use tools/face_eval.py".format(cfg.metric)) assert cfg.metric in ['COCO', 'VOC'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() sub_eval_prog = None sub_keys = None sub_values = None # build sub-program if 'Mask' in main_arch and multi_scale_test: sub_eval_prog = fluid.Program() with fluid.program_guard(sub_eval_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['EvalReader']['inputs_def'] inputs_def['mask_branch'] = True feed_vars, eval_loader = model.build_inputs(**inputs_def) sub_fetches = model.eval( feed_vars, multi_scale_test, mask_branch=True) assert cfg.metric == 'COCO' extra_keys = ['im_id', 'im_shape'] sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog, extra_keys) sub_eval_prog = sub_eval_prog.clone(True) # load model if 'weights' in cfg: checkpoint.load_checkpoint(exe, eval_prog, cfg.weights) resolution = None if 'Mask' in cfg.architecture: resolution = model.mask_head.resolution results = eval_run( exe, compile_program, loader, keys, values, cls, cfg, sub_eval_prog, sub_keys, sub_values, resolution=resolution) # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset)
def main(): env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) import random local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) if FLAGS.fp16: assert (getattr(model.backbone, 'norm_type', None) != 'affine_channel'), \ '--fp16 currently does not support affine channel, ' \ ' please modify backbone settings to use batch norm' with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx: inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(feed_vars) loss = train_fetches['loss'] if FLAGS.fp16: loss *= ctx.get_loss_scale_var() lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) if FLAGS.fp16: loss /= ctx.get_loss_scale_var() # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.print_params: param_delimit_str = '-' * 20 + "All parameters in current graph" + '-' * 20 print(param_delimit_str) for block in train_prog.blocks: for param in block.all_parameters(): print("parameter name: {}\tshape: {}".format(param.name, param.shape)) print('-' * len(param_delimit_str)) return if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) eval_loader.set_sample_list_generator(eval_reader, place) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] if cfg.metric == 'WIDERFACE': extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' start_iter = 0 if cfg.pretrain_weights: checkpoint.load_params(exe, train_prog, cfg.pretrain_weights) pruned_params = FLAGS.pruned_params assert FLAGS.pruned_params is not None, \ "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert len(pruned_params) == len(pruned_ratios), \ "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \ "unsupported prune criterion {}".format(FLAGS.prune_criterion) pruner = Pruner(criterion=FLAGS.prune_criterion) train_prog = pruner.prune( train_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: base_flops = flops(eval_prog) eval_prog = pruner.prune( eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True)[0] pruned_flops = flops(eval_prog) logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format( float(base_flops - pruned_flops) / base_flops, base_flops, pruned_flops)) compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) if FLAGS.resume_checkpoint: checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint) start_iter = checkpoint.global_step() train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) train_loader.set_sample_list_generator(train_reader, place) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' train_stats = TrainingStats(cfg.log_smooth_window, train_keys) train_loader.start() start_time = time.time() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_smooth_window) best_box_ap_list = [0.0, 0] #[map, iter] # use tb-paddle to log data if FLAGS.use_tb: from tb_paddle import SummaryWriter tb_writer = SummaryWriter(FLAGS.tb_log_dir) tb_loss_step = 0 tb_mAP_step = 0 if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution dataset = cfg['EvalReader']['dataset'] box_ap_stats = eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset) for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} # use tb-paddle to log loss if FLAGS.use_tb: if it % cfg.log_iter == 0: for loss_name, loss_value in stats.items(): tb_writer.add_scalar(loss_name, loss_value, tb_loss_step) tb_loss_step += 1 train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ and (not FLAGS.dist or trainer_id == 0): save_name = str(it) if it != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name)) if FLAGS.eval: # evaluation results = eval_run( exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg=cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset) # use tb_paddle to log mAP if FLAGS.use_tb: tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step) tb_mAP_step += 1 if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = it checkpoint.save(exe, train_prog, os.path.join(save_dir, "best_model")) logger.info("Best test box ap: {}, in iter: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def compress(args): class_dim = 1000 image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): exist = os.path.exists( os.path.join(args.pretrained_model, var.name)) print("exist", exist) return exist #fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(reader.val(), batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() print( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 print("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n, lr_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[ avg_cost.name, acc_top1.name, acc_top5.name, "learning_rate" ]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) lr_n = np.mean(lr_n) print( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {};lrn: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, lr_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): #if "_weights" in param.name and "conv1_weights" not in param.name: if "_sep_weights" in param.name: params.append(param.name) print("fops before pruning: {}".format(flops( fluid.default_main_program()))) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) """ do not inherit learning rate """ if (os.path.exists(args.pretrained_model + "/learning_rate")): os.remove(args.pretrained_model + "/learning_rate") if (os.path.exists(args.pretrained_model + "/@LR_DECAY_COUNTER@")): os.remove(args.pretrained_model + "/@LR_DECAY_COUNTER@") fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruned_program = pruned_program_iter pruned_val_program = pruned_val_program_iter for i in range(args.num_epochs): train(i, pruned_program) test(i, pruned_val_program) save_model(args, exe, pruned_program, pruned_val_program, i)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") conv7 = fluid.layers.conv2d_transpose(input=conv6, num_filters=16, filter_size=2, stride=2) shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() main_program, _, _ = pruner.prune( main_program, scope, params=["conv4_weights", "conv2d_transpose_0.w_0"], ratios=[0.5, 0.6], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4, 3, 3, 3), "conv2_weights": (4, 4, 3, 3), "conv3_weights": (8, 4, 3, 3), "conv4_weights": (4, 8, 3, 3), "conv5_weights": (8, 4, 3, 3), "conv6_weights": (8, 8, 3, 3), "conv2d_transpose_0.w_0": (8, 16, 2, 2), } for param in main_program.global_block().all_parameters(): if param.name in shapes: print("param: {}; param shape: {}".format( param.name, param.shape)) self.assertTrue(param.shape == shapes[param.name])
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['iterable'] = True feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(infer_prog) pruner = Pruner() infer_prog, _, _ = pruner.prune(infer_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True) pruned_flops = flops(infer_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) reader = create_reader(cfg.TestReader, devices_num=1) loader.set_sample_list_generator(reader, place) exe.run(startup_prog) if cfg.weights: checkpoint.load_checkpoint(exe, infer_prog, cfg.weights) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() imid2path = dataset.get_imid2path() for iter_id, data in enumerate(loader()): outs = exe.run(infer_prog, feed=data, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) bbox_results = None mask_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results) save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95)
def test_prune(self): train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") feature = fluid.layers.reshape(conv6, [-1, 128, 16]) predict = fluid.layers.fc(input=feature, size=10, act='softmax') label = fluid.data(name='label', shape=[None, 1], dtype='int64') print(label.shape) print(predict.shape) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) adam_optimizer.minimize(avg_cost) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() exe.run(startup_program, scope=scope) criterion = 'bn_scale' pruner = Pruner(criterion) main_program, _, _ = pruner.prune(train_program, scope, params=["conv4_weights"], ratios=[0.5], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) x = numpy.random.random(size=(10, 3, 16, 16)).astype('float32') label = numpy.random.random(size=(10, 1)).astype('int64') loss_data, = exe.run(train_program, feed={ "image": x, "label": label }, fetch_list=[cost.name]) save_model(exe, main_program, 'model_file') pruned_program = fluid.Program() pruned_startup_program = fluid.Program() with fluid.program_guard(pruned_program, pruned_startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") pruned_test_program = pruned_program.clone(for_test=True) exe.run(pruned_startup_program) load_model(exe, pruned_program, 'model_file') load_model(exe, pruned_test_program, 'model_file') shapes = { "conv1_weights": (4, 3, 3, 3), "conv2_weights": (4, 4, 3, 3), "conv3_weights": (8, 4, 3, 3), "conv4_weights": (4, 8, 3, 3), "conv5_weights": (8, 4, 3, 3), "conv6_weights": (8, 8, 3, 3) } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name: print("param: {}; param shape: {}".format( param.name, param.shape)) self.assertTrue(param.shape == shapes[param.name]) for param in pruned_test_program.global_block().all_parameters(): if "weights" in param.name: print("param: {}; param shape: {}".format( param.name, param.shape)) self.assertTrue(param.shape == shapes[param.name])
def test_prune(self): train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() exe.run(startup_program, scope=scope) criterion = 'bn_scale' pruner = Pruner(criterion) main_program, _, _ = pruner.prune(train_program, scope, params=["conv4_weights"], ratios=[0.5], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) x = numpy.random.random(size=(10, 3, 16, 16)).astype('float32') loss_data, = exe.run(train_program, feed={"image": x}, fetch_list=[conv6.name]) save_model(exe, main_program, 'model_file') pruned_program = fluid.Program() pruned_startup_program = fluid.Program() with fluid.program_guard(pruned_program, pruned_startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") exe.run(pruned_startup_program) load_model(exe, pruned_program, 'model_file') shapes = { "conv1_weights": (4, 3, 3, 3), "conv2_weights": (4, 4, 3, 3), "conv3_weights": (8, 4, 3, 3), "conv4_weights": (4, 8, 3, 3), "conv5_weights": (8, 4, 3, 3), "conv6_weights": (8, 8, 3, 3) } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name: print("param: {}; param shape: {}".format( param.name, param.shape)) self.assertTrue(param.shape == shapes[param.name])
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) label = fluid.data(name='label', shape=[None, 1], dtype='int64') conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu') conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu') sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6') conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") flag = fluid.layers.fill_constant([1], value=1, dtype='int32') rand_flag = paddle.randint(2, dtype='int32') cond = fluid.layers.less_than(x=flag, y=rand_flag) cond_output = fluid.layers.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=False, name='cond_output') def cond_block1(): cond_conv = conv_bn_layer(conv5, 8, 3, "conv_cond1_1") fluid.layers.assign(input=cond_conv, output=cond_output) def cond_block2(): cond_conv1 = conv_bn_layer(conv5, 8, 3, "conv_cond2_1") cond_conv2 = conv_bn_layer(cond_conv1, 8, 3, "conv_cond2_2") fluid.layers.assign(input=cond_conv2, output=cond_output) fluid.layers.cond(cond, cond_block1, cond_block2) sum3 = fluid.layers.sum([sum2, cond_output]) conv6 = conv_bn_layer(sum3, 8, 3, "conv6") sub1 = conv6 - sum3 mult = sub1 * sub1 conv7 = conv_bn_layer( mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False) floored = fluid.layers.floor(conv7) scaled = fluid.layers.scale(floored) concated = fluid.layers.concat([scaled, mult], axis=1) conv8 = conv_bn_layer(concated, 8, 3, "conv8") predict = fluid.layers.fc(input=conv8, size=10, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) avg_cost = fluid.layers.mean(cost) adam_optimizer.minimize(avg_cost) params = [] for param in main_program.all_parameters(): if 'conv' in param.name: params.append(param.name) #TODO: To support pruning convolution before fc layer. params.remove('conv8_weights') place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) x = np.random.random(size=(10, 3, 16, 16)).astype('float32') label = np.random.random(size=(10, 1)).astype('int64') loss_data, = exe.run(main_program, feed={"image": x, "label": label}, fetch_list=[cost.name]) pruner = Pruner() main_program, _, _ = pruner.prune( main_program, fluid.global_scope(), params=params, ratios=[0.5] * len(params), place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) loss_data, = exe.run(main_program, feed={"image": x, "label": label}, fetch_list=[cost.name])
def compress(args): train_reader = None test_reader = None if args.data == "mnist": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend="cv2", transform=transform) class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] exe = paddle.static.Executor(place) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') batch_size_per_card = int(args.batch_size / len(places)) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[image, label], drop_last=True, batch_size=batch_size_per_card, shuffle=True, return_list=False, use_shared_memory=True, num_workers=16) valid_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[image, label], drop_last=False, return_list=False, use_shared_memory=True, batch_size=batch_size_per_card, shuffle=False) step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) opt, learning_rate = create_optimizer(args, step_per_epoch) opt.minimize(avg_cost) exe.run(paddle.static.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) paddle.static.load(paddle.static.default_main_program(), args.pretrained_model, exe) def test(epoch, program): acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): for batch_id, data in enumerate(train_loader): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, learning_rate.get_lr(), loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) learning_rate.step() batch_id += 1 test(0, val_program) params = get_pruned_params(args, paddle.static.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(paddle.static.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(), paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) build_strategy = paddle.static.BuildStrategy() exec_strategy = paddle.static.ExecutionStrategy() train_program = paddle.static.CompiledProgram( pruned_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for i in range(args.num_epochs): train(i, train_program) if (i + 1) % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) paddle.static.save_inference_model(infer_model_path, [image], [out], exe, program=pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def sensitivity(program, place, param_names, eval_func, sensitivities_file=None, pruned_ratios=None): scope = fluid.global_scope() graph = GraphWrapper(program) sensitivities = load_sensitivities(sensitivities_file) if pruned_ratios is None: pruned_ratios = np.arange(0.1, 1, step=0.1) total_evaluate_iters = 0 for name in param_names: if name not in sensitivities: sensitivities[name] = {} total_evaluate_iters += len(list(pruned_ratios)) else: total_evaluate_iters += (len(list(pruned_ratios)) - len(sensitivities[name])) eta = '-' start_time = time.time() baseline = eval_func(graph.program) cost = time.time() - start_time eta = cost * (total_evaluate_iters - 1) current_iter = 1 for name in sensitivities: for ratio in pruned_ratios: if ratio in sensitivities[name]: logging.debug('{}, {} has computed.'.format(name, ratio)) continue progress = float(current_iter) / total_evaluate_iters progress = "%.2f%%" % (progress * 100) logging.info( "Total evaluate iters={}, current={}, progress={}, eta={}". format( total_evaluate_iters, current_iter, progress, seconds_to_hms( int(cost * (total_evaluate_iters - current_iter)))), use_color=True) current_iter += 1 pruner = Pruner() logging.info("sensitive - param: {}; ratios: {}".format( name, ratio)) pruned_program, param_backup, _ = pruner.prune( program=graph.program, scope=scope, params=[name], ratios=[ratio], place=place, lazy=True, only_graph=False, param_backup=True) pruned_metric = eval_func(pruned_program) loss = (baseline - pruned_metric) / baseline logging.info("pruned param: {}; {}; loss={}".format( name, ratio, loss)) sensitivities[name][ratio] = loss with open(sensitivities_file, 'wb') as f: pickle.dump(sensitivities, f) for param_name in param_backup.keys(): param_t = scope.find_var(param_name).get_tensor() param_t.set(param_backup[param_name], place) return sensitivities
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) # fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruner = AutoPruner(pruned_val_program_iter, fluid.global_scope(), place, params=params, init_ratios=[0.1] * len(params), pruned_flops=0.1, pruned_latency=None, server_addr=("", 0), init_temperature=100, reduce_rate=0.85, max_try_times=300, max_client_num=10, search_steps=100, max_ratios=0.2, min_ratios=0., is_server=True, key="auto_pruner") while True: pruned_program, pruned_val_program = pruner.prune( pruned_program_iter, pruned_val_program_iter) for i in range(0): train(i, pruned_program) score = test(0, pruned_val_program) pruner.reward(score)
def test_concat(self): main_program = fluid.Program() startup_program = fluid.Program() # X # conv1 conv2-->concat conv3-->sum-->out # | ^ | ^ # |____________| |____________________| # with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(input, 8, 3, "conv2", sync_bn=True) tmp = fluid.layers.concat([conv1, conv2], axis=1) conv3 = conv_bn_layer(input, 16, 3, "conv3", bias=None) out = conv3 + tmp shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruner = Pruner() # test backward search of concat pruned_program, _, _ = pruner.prune(main_program, scope, params=["conv3_weights"], ratios=[0.5], place=place, lazy=False, only_graph=True, param_backup=None, param_shape_backup=None) shapes = { "conv3_weights": (8, 3, 3, 3), "conv2_weights": (4, 3, 3, 3), "conv1_weights": (4, 3, 3, 3) } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name and "conv2d" in param.name: self.assertTrue(shapes[param.name] == param.shape) # test forward search of concat pruned_program, _, _ = pruner.prune( main_program, scope, params=["conv1_weights", "conv2_weights"], ratios=[0.5, 0.5], place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) shapes = { "conv1_weights": (4, 3, 3, 3), "conv1_bn_scale": (4, ), "conv1_bn_variance": (4, ), "conv1_bn_mean": (4, ), "conv1_bn_offset": (4, ), "conv2_weights": (4, 3, 3, 3), "sync_batch_norm_0.w_0": (4, ), "sync_batch_norm_0.w_1": (4, ), "conv2_bn_scale": (4, ), "conv2_bn_offset": (4, ), "conv3_weights": (8, 3, 3, 3), "conv3_bn_mean": (8, ), "conv3_bn_offset": (8, ), "conv3_bn_scale": (8, ), "conv3_bn_variance": (8, ), "conv3_out.b_0": (8, ), } for param in pruned_program.global_block().all_parameters(): if "weights" in param.name and "conv2d" in param.name: self.assertTrue(shapes[param.name] == param.shape)
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") pruned_params = cfg.SLIM.PRUNE_PARAMS.strip().split(',') pruned_ratios = cfg.SLIM.PRUNE_RATIOS if isinstance(pruned_ratios, float): pruned_ratios = [pruned_ratios] * len(pruned_params) elif isinstance(pruned_ratios, (list, tuple)): pruned_ratios = list(pruned_ratios) else: raise ValueError( 'expect SLIM.PRUNE_RATIOS type is float, list, tuple, ' 'but received {}'.format(type(pruned_ratios))) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars(exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_tb: if not args.tb_log_dir: print_info("Please specify the log directory by --tb_log_dir.") exit(1) from tb_paddle import SummaryWriter log_writer = SummaryWriter(args.tb_log_dir) pruner = Pruner() train_prog = pruner.prune(train_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError(( "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - global_step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_tb: log_writer.add_scalar('Train/mean_iou', mean_iou, global_step) log_writer.add_scalar('Train/mean_acc', mean_acc, global_step) log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/step/sec', speed, global_step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: ckpt_dir = save_prune_checkpoint(exe, train_prog, epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate(cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_tb: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, global_step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, global_step) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: visualize(cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: save_prune_checkpoint(exe, train_prog, 'final')
def main(): env = os.environ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] train_feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(train_feed_vars) loss = train_fetches['loss'] start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) # When iterable mode, set set_sample_list_generator(train_reader, place) train_loader.set_sample_list_generator(train_reader) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, fluid.default_startup_program()): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] test_feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(test_feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(eval_reader, place) eval_loader.set_sample_list_generator(eval_reader) teacher_cfg = load_config(FLAGS.teacher_config) merge_config(FLAGS.opt) teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): teacher_feed_vars = OrderedDict() for name, var in train_feed_vars.items(): teacher_feed_vars[name] = teacher_program.global_block( )._clone_variable(var, force_persistable=False) model = create(teacher_arch) train_fetches = model.train(teacher_feed_vars) teacher_loss = train_fetches['loss'] exe.run(teacher_startup_program) assert FLAGS.teacher_pretrained, "teacher_pretrained should be set" checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained) teacher_program = teacher_program.clone(for_test=True) target_number = len(model.yolo_head.anchor_masks) data_name_map = { 'image': 'image', 'gt_bbox': 'gt_bbox', 'gt_class': 'gt_class', 'gt_score': 'gt_score' } for i in range(target_number): data_name_map['target{}'.format(i)] = 'target{}'.format(i) merge(teacher_program, fluid.default_main_program(), data_name_map, place) output_names = [ [ 'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0', 'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0', 'strided_slice_4.tmp_0', 'transpose_0.tmp_0' ], [ 'strided_slice_5.tmp_0', 'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0', 'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0' ], [ 'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0', 'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0', 'strided_slice_14.tmp_0', 'transpose_4.tmp_0' ], ] yolo_output_names = [] for i in range(target_number): yolo_output_names.extend(output_names[i]) assert cfg.use_fine_grained_loss, \ "Only support use_fine_grained_loss=True, Please set it in config file or '-o use_fine_grained_loss=true'" distill_loss = split_distill(yolo_output_names, 1000, target_number) loss = distill_loss + loss lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') lr = lr_builder() opt = optim_builder(lr) opt.minimize(loss) exe.run(fluid.default_startup_program()) checkpoint.load_params(exe, fluid.default_main_program(), cfg.pretrain_weights) assert FLAGS.pruned_params is not None, \ "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert len(pruned_params) == len(pruned_ratios), \ "The length of pruned params and pruned ratios should be equal." assert pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios), \ "The elements of pruned ratios should be in range (0, 1)." assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \ "unsupported prune criterion {}".format(FLAGS.prune_criterion) pruner = Pruner(criterion=FLAGS.prune_criterion) distill_prog = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] base_flops = flops(eval_prog) eval_prog = pruner.prune(eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True)[0] pruned_flops = flops(eval_prog) logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format( float(base_flops - pruned_flops) / base_flops, base_flops, pruned_flops)) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) compiled_eval_prog = fluid.CompiledProgram(eval_prog) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() map_type = cfg.map_type if 'map_type' in cfg else '11point' best_box_ap_list = [0.0, 0] #[map, iter] cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) train_loader.start() for step_id in range(start_iter, cfg.max_iters): teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run( parallel_main, fetch_list=[ 'teacher_' + teacher_loss.name, distill_loss.name, loss.name, lr.name ]) if step_id % cfg.log_iter == 0: logger.info( "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}" .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0], teacher_loss_np[0])) if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1: save_name = str( step_id) if step_id != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, distill_prog, os.path.join(save_dir, save_name)) # eval results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg) resolution = None box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = step_id checkpoint.save(exe, distill_prog, os.path.join("./", "best_model")) logger.info("Best test box ap: {}, in step: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, fluid.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(fluid.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def get_prune_model(model_file, param_file, ratio, save_path): """ Using the structured pruning algorithm to compress the network. This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy. Args: model_file(str), param_file(str): The inference model to be pruned. ratio(float): The ratio to prune the model. save_path(str): The save path of pruned model. """ assert os.path.exists(model_file), f'{model_file} does not exist.' assert os.path.exists( param_file) or param_file is None, f'{param_file} does not exist.' paddle.enable_static() SKIP = ['image', 'feed', 'pool2d_0.tmp_0'] folder = os.path.dirname(model_file) model_name = model_file.split('/')[-1] if param_file is None: param_name = None else: param_name = param_file.split('/')[-1] main_prog = static.Program() startup_prog = static.Program() place = paddle.CPUPlace() exe = paddle.static.Executor() scope = static.global_scope() exe.run(startup_prog) [inference_program, feed_target_names, fetch_targets] = ( fluid.io.load_inference_model( folder, exe, model_filename=model_name, params_filename=param_name)) prune_params = [] graph = GraphWrapper(inference_program) for op in graph.ops(): for inp in op.all_inputs(): name = inp.name() if inp.name() in SKIP: continue if 'tmp' in inp.name(): continue cond_conv = len(inp._var.shape) == 4 and 'conv' in name # only prune conv if cond_conv: prune_params.append(name) # drop last conv prune_params.pop() ratios = [ratio] * len(prune_params) pruner = Pruner() main_program, _, _ = pruner.prune( inference_program, scope, params=prune_params, ratios=ratios, place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) fluid.io.save_inference_model( save_path, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=main_program, model_filename=model_name, params_filename=param_name)
def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) logger.info(config) # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] program.check_gpu(use_gpu) alg = config['Global']['algorithm'] assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE'] if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']: config['Global']['char_ops'] = CharacterOps(config['Global']) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() startup_program = fluid.Program() train_program = fluid.Program() train_build_outputs = program.build(config, train_program, startup_program, mode='train') train_loader = train_build_outputs[0] train_fetch_name_list = train_build_outputs[1] train_fetch_varname_list = train_build_outputs[2] train_opt_loss_name = train_build_outputs[3] eval_program = fluid.Program() eval_build_outputs = program.build(config, eval_program, startup_program, mode='eval') eval_fetch_name_list = eval_build_outputs[1] eval_fetch_varname_list = eval_build_outputs[2] eval_program = eval_program.clone(for_test=True) train_reader = reader_main(config=config, mode="train") train_loader.set_sample_list_generator(train_reader, places=place) eval_reader = reader_main(config=config, mode="eval") exe = fluid.Executor(place) exe.run(startup_program) # compile program for multi-devices init_model(config, train_program, exe) sen = load_sensitivities("sensitivities_0.data") for i in skip_list: if i in sen.keys(): sen.pop(i) back_bone_list = ['conv' + str(x) for x in range(1, 5)] for i in back_bone_list: for key in list(sen.keys()): if i + '_' in key: sen.pop(key) ratios = get_ratios_by_loss(sen, 0.03) logger.info("FLOPs before pruning: {}".format(flops(eval_program))) pruner = Pruner(criterion='geometry_median') print("ratios: {}".format(ratios)) pruned_val_program, _, _ = pruner.prune(eval_program, fluid.global_scope(), params=ratios.keys(), ratios=ratios.values(), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(train_program, fluid.global_scope(), params=ratios.keys(), ratios=ratios.values(), place=place) logger.info("FLOPs after pruning: {}".format(flops(pruned_val_program))) train_compile_program = program.create_multi_devices_program( pruned_program, train_opt_loss_name) train_info_dict = {'compile_program':train_compile_program,\ 'train_program':pruned_program,\ 'reader':train_loader,\ 'fetch_name_list':train_fetch_name_list,\ 'fetch_varname_list':train_fetch_varname_list} eval_info_dict = {'program':pruned_val_program,\ 'reader':eval_reader,\ 'fetch_name_list':eval_fetch_name_list,\ 'fetch_varname_list':eval_fetch_varname_list} if alg in ['EAST', 'DB']: program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict, is_slim="prune") else: program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)