def __call__(self, model): paddleslim = try_import('paddleslim') from paddleslim.analysis import dygraph_flops as flops input_spec = [{ "image": paddle.ones(shape=[1, 3, 640, 640], dtype='float32'), "im_shape": paddle.full([1, 2], 640, dtype='float32'), "scale_factor": paddle.ones(shape=[1, 2], dtype='float32') }] if self.print_params: print_prune_params(model) ori_flops = flops(model, input_spec) / 1000 logger.info("FLOPs before pruning: {}GFLOPs".format(ori_flops)) if self.criterion == 'fpgm': pruner = paddleslim.dygraph.FPGMFilterPruner(model, input_spec) elif self.criterion == 'l1_norm': pruner = paddleslim.dygraph.L1NormFilterPruner(model, input_spec) logger.info("pruned params: {}".format(self.pruned_params)) pruned_ratios = [float(n) for n in self.pruned_ratios] ratios = {} for i, param in enumerate(self.pruned_params): ratios[param] = pruned_ratios[i] pruner.prune_vars(ratios, [0]) pruned_flops = flops(model, input_spec) / 1000 logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( pruned_flops, (ori_flops - pruned_flops) / ori_flops)) return model
def _prune_model(pruner, config, model): from paddleslim.analysis import dygraph_flops as flops logger.info("FLOPs before pruning: {}GFLOPs".format( flops(model, [1] + config["Global"]["image_shape"]) / 1e9)) model.eval() params = [] for sublayer in model.sublayers(): for param in sublayer.parameters(include_sublayers=False): if isinstance(sublayer, paddle.nn.Conv2D): params.append(param.name) ratios = {} for param in params: ratios[param] = config["Slim"]["prune"]["pruned_ratio"] plan = pruner.prune_vars(ratios, [0]) logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( flops(model, [1] + config["Global"]["image_shape"]) / 1e9, plan.pruned_flops)) for param in model.parameters(): if "conv2d" in param.name: logger.info("{}\t{}".format(param.name, param.shape)) model.train()
def main(): cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['use_dataloader'] = False feed_vars, _ = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(infer_prog) pruner = Pruner() infer_prog, _, _ = pruner.prune( infer_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True) pruned_flops = flops(infer_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) exe.run(startup_prog) checkpoint.load_checkpoint(exe, infer_prog, cfg.weights) save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") shapes = {} for param in main_program.global_block().all_parameters(): shapes[param.name] = param.shape place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() exe.run(startup_program, scope=scope) pruned_flops = 0.5 pruner = AutoPruner(main_program, scope, place, params=["conv4_weights"], init_ratios=[0.5], pruned_flops=0.5, pruned_latency=None, server_addr=("", 0), init_temperature=100, reduce_rate=0.85, max_try_number=300, max_client_num=10, search_steps=2, max_ratios=[0.9], min_ratios=[0], key="auto_pruner") base_flops = flops(main_program) program = pruner.prune(main_program) self.assertTrue(flops(program) <= base_flops * (1 - pruned_flops)) pruner.reward(1) program = pruner.prune(main_program) self.assertTrue(flops(program) <= base_flops * (1 - pruned_flops)) pruner.reward(1)
def runTest(self): x_shape = (1, 3, 32, 32) y_shape = (1, 3, 16, 16) net = Net2() x = np.random.uniform(-1, 1, x_shape).astype('float32') y = np.random.uniform(-1, 1, y_shape).astype('float32') inputs = [paddle.to_tensor(x), paddle.to_tensor(y)] FLOPs1 = flops(net, inputs) shapes = [x_shape, y_shape] FLOPs2 = flops(net, shapes, dtypes=["float32", "float32"]) self.assertTrue(FLOPs1 == FLOPs2)
def get_ratios_by_sensitivity(self, pruned_flops, align=None, dims=[0], skip_vars=[]): """ Get a group of ratios by sensitivities. Args: pruned_flops(float): The excepted rate of FLOPs to be pruned. It should be in range (0, 1). align(int, optional): Round the size of each pruned dimension to multiple of 'align' if 'align' is not None. Default: None. dims(list, optional): The dims to be pruned on. [0] means pruning channels of output for convolution. Default: [0]. skip_vars(list, optional): The names of tensors whose sensitivity won't be computed. "None" means skip nothing. Default: None. Returns: tuple: A tuple with format ``(ratios, pruned_flops)`` . "ratios" is a dict whose key is name of tensor and value is ratio to be pruned. "pruned_flops" is the ratio of total pruned FLOPs in the model. """ base_flops = flops(self.model, self.inputs) _logger.debug("Base FLOPs: {}".format(base_flops)) low = 0. up = 1.0 history = set() while low < up: loss = (low + up) / 2 ratios = self._get_ratios_by_loss(self._status.sensitivies, loss, skip_vars=skip_vars) _logger.debug("pruning ratios: {}".format(ratios)) if align is not None: ratios = self._round_to(ratios, dims=dims, factor=align) plan = self.prune_vars(ratios, axis=dims) c_flops = flops(self.model, self.inputs) _logger.debug("FLOPs after pruning: {}".format(c_flops)) c_pruned_flops = (base_flops - c_flops) / base_flops plan.restore(self.model) _logger.debug( "Seaching ratios, pruned FLOPs: {}".format(c_pruned_flops)) key = str(round(c_pruned_flops, 4)) if key in history: return ratios, c_pruned_flops history.add(key) if c_pruned_flops < pruned_flops: low = loss elif c_pruned_flops > pruned_flops: up = loss else: return ratios, c_pruned_flops return ratios, c_pruned_flops
def _flops(self, loader): self.model.eval() try: import paddleslim except Exception as e: logger.warning( 'Unable to calculate flops, please install paddleslim, for example: `pip install paddleslim`' ) return from paddleslim.analysis import dygraph_flops as flops input_data = None for data in loader: input_data = data break input_spec = [{ "image": input_data['image'][0].unsqueeze(0), "im_shape": input_data['im_shape'][0].unsqueeze(0), "scale_factor": input_data['scale_factor'][0].unsqueeze(0) }] flops = flops(self.model, input_spec) / (1000**3) logger.info(" Model FLOPs : {:.6f}G. (image shape is {})".format( flops, input_data['image'][0].unsqueeze(0).shape))
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") self.assertTrue(792576 == flops(main_program))
def runTest(self): x_shape = (1, 3, 32, 32) y_shape = (1, 3, 16, 16) net = Net1() x = np.random.uniform(-1, 1, x_shape).astype('float32') y = np.random.uniform(-1, 1, y_shape).astype('float32') inputs = { "x": paddle.to_tensor(x), "y": paddle.to_tensor(y), "z": "test" } FLOPs = flops(net, [inputs]) self.assertTrue(FLOPs == 59184)
def compute_constraint(self, program): if self.ctype == 'flops': model_status = flops(program) elif self.ctype == 'latency': assert os.path.exists( self.table_file ), "latency constraint must have latency table, please check whether table file exist!" model_latency = TableLatencyEvaluator(self.table_file) model_status = model_latency.latency(program, only_conv=True) else: raise NotImplementedError( "{} constraint is NOT support!!! Now PaddleSlim support flops constraint and latency constraint" .format(self.ctype)) return model_status
def test_nas(self): factory = SearchSpaceFactory() config0 = {'input_size': 224, 'output_size': 7, 'block_num': 5} config1 = {'input_size': 7, 'output_size': 1, 'block_num': 2} configs = [('MobileNetV2Space', config0), ('ResNetSpace', config1)] space = factory.get_search_space([('MobileNetV2Space', config0)]) origin_arch = space.token2arch()[0] main_program = fluid.Program() s_program = fluid.Program() with fluid.program_guard(main_program, s_program): input = fluid.data(name="input", shape=[None, 3, 224, 224], dtype="float32") origin_arch(input) base_flops = flops(main_program) search_steps = 3 sa_nas = SANAS(configs, search_steps=search_steps, server_addr=("", 0), is_server=True) for i in range(search_steps): archs = sa_nas.next_archs() main_program = fluid.Program() s_program = fluid.Program() with fluid.program_guard(main_program, s_program): input = fluid.data(name="input", shape=[None, 3, 224, 224], dtype="float32") archs[0](input) sa_nas.reward(1) self.assertTrue(flops(main_program) < base_flops)
def main(): env = os.environ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] train_feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(train_feed_vars) loss = train_fetches['loss'] start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) # When iterable mode, set set_sample_list_generator(train_reader, place) train_loader.set_sample_list_generator(train_reader) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, fluid.default_startup_program()): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] test_feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(test_feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(eval_reader, place) eval_loader.set_sample_list_generator(eval_reader) teacher_cfg = load_config(FLAGS.teacher_config) merge_config(FLAGS.opt) teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): teacher_feed_vars = OrderedDict() for name, var in train_feed_vars.items(): teacher_feed_vars[name] = teacher_program.global_block( )._clone_variable(var, force_persistable=False) model = create(teacher_arch) train_fetches = model.train(teacher_feed_vars) teacher_loss = train_fetches['loss'] exe.run(teacher_startup_program) assert FLAGS.teacher_pretrained, "teacher_pretrained should be set" checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained) teacher_program = teacher_program.clone(for_test=True) target_number = len(model.yolo_head.anchor_masks) data_name_map = { 'image': 'image', 'gt_bbox': 'gt_bbox', 'gt_class': 'gt_class', 'gt_score': 'gt_score' } for i in range(target_number): data_name_map['target{}'.format(i)] = 'target{}'.format(i) merge(teacher_program, fluid.default_main_program(), data_name_map, place) output_names = [ [ 'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0', 'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0', 'strided_slice_4.tmp_0', 'transpose_0.tmp_0' ], [ 'strided_slice_5.tmp_0', 'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0', 'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0' ], [ 'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0', 'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0', 'strided_slice_14.tmp_0', 'transpose_4.tmp_0' ], ] yolo_output_names = [] for i in range(target_number): yolo_output_names.extend(output_names[i]) assert cfg.use_fine_grained_loss, \ "Only support use_fine_grained_loss=True, Please set it in config file or '-o use_fine_grained_loss=true'" distill_loss = split_distill(yolo_output_names, 1000, target_number) loss = distill_loss + loss lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') lr = lr_builder() opt = optim_builder(lr) opt.minimize(loss) exe.run(fluid.default_startup_program()) checkpoint.load_params(exe, fluid.default_main_program(), cfg.pretrain_weights) assert FLAGS.pruned_params is not None, \ "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert len(pruned_params) == len(pruned_ratios), \ "The length of pruned params and pruned ratios should be equal." assert pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios), \ "The elements of pruned ratios should be in range (0, 1)." assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \ "unsupported prune criterion {}".format(FLAGS.prune_criterion) pruner = Pruner(criterion=FLAGS.prune_criterion) distill_prog = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] base_flops = flops(eval_prog) eval_prog = pruner.prune(eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True)[0] pruned_flops = flops(eval_prog) logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format( float(base_flops - pruned_flops) / base_flops, base_flops, pruned_flops)) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) compiled_eval_prog = fluid.CompiledProgram(eval_prog) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() map_type = cfg.map_type if 'map_type' in cfg else '11point' best_box_ap_list = [0.0, 0] #[map, iter] cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) train_loader.start() for step_id in range(start_iter, cfg.max_iters): teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run( parallel_main, fetch_list=[ 'teacher_' + teacher_loss.name, distill_loss.name, loss.name, lr.name ]) if step_id % cfg.log_iter == 0: logger.info( "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}" .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0], teacher_loss_np[0])) if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1: save_name = str( step_id) if step_id != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, distill_prog, os.path.join(save_dir, save_name)) # eval results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg) resolution = None box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = step_id checkpoint.save(exe, distill_prog, os.path.join("./", "best_model")) logger.info("Best test box ap: {}, in step: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] if args.is_server: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): train_loader, data, label = create_data_loader(image_shape) data = conv_bn_layer(input=data, num_filters=32, filter_size=3, stride=2, padding='SAME', act='relu6', name='mobilenetv2_conv1') data = archs(data)[0] data = conv_bn_layer(input=data, num_filters=1280, filter_size=1, stride=1, padding='SAME', act='relu6', name='mobilenetv2_last_conv') data = fluid.layers.pool2d(input=data, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True, name='mobilenetv2_last_pool') output = fluid.layers.fc( input=data, size=args.class_dim, param_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) cost = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_cost = fluid.layers.mean(cost) acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Momentum( learning_rate=0.1, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.fluid.io.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.fluid.io.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) test_loader, _, _ = create_data_loader(image_shape) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def main(): """ Main evaluate function """ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture multi_scale_test = getattr(cfg, 'MultiScaleTEST', None) # define executor place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, loader = model.build_inputs(**inputs_def) if multi_scale_test is None: fetches = model.eval(feed_vars) else: fetches = model.eval(feed_vars, multi_scale_test) eval_prog = eval_prog.clone(True) exe.run(startup_prog) reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(reader, place) loader.set_sample_list_generator(reader) dataset = cfg['EvalReader']['dataset'] # eval already exists json file if FLAGS.json_eval: logger.info( "In json_eval mode, PaddleDetection will evaluate json files in " "output_eval directly. And proposal.json, bbox.json and mask.json " "will be detected by default.") json_eval_results( cfg.metric, json_directory=FLAGS.output_eval, dataset=dataset) return pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(eval_prog) pruner = Pruner() eval_prog, _, _ = pruner.prune( eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False) pruned_flops = flops(eval_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) compile_program = fluid.CompiledProgram(eval_prog).with_data_parallel() assert cfg.metric != 'OID', "eval process of OID dataset \ is not supported." if cfg.metric == "WIDERFACE": raise ValueError("metric type {} does not support in tools/eval.py, " "please use tools/face_eval.py".format(cfg.metric)) assert cfg.metric in ['COCO', 'VOC'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() sub_eval_prog = None sub_keys = None sub_values = None # build sub-program if 'Mask' in main_arch and multi_scale_test: sub_eval_prog = fluid.Program() with fluid.program_guard(sub_eval_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['EvalReader']['inputs_def'] inputs_def['mask_branch'] = True feed_vars, eval_loader = model.build_inputs(**inputs_def) sub_fetches = model.eval( feed_vars, multi_scale_test, mask_branch=True) assert cfg.metric == 'COCO' extra_keys = ['im_id', 'im_shape'] sub_keys, sub_values, _ = parse_fetches(sub_fetches, sub_eval_prog, extra_keys) sub_eval_prog = sub_eval_prog.clone(True) # load model if 'weights' in cfg: checkpoint.load_checkpoint(exe, eval_prog, cfg.weights) resolution = None if 'Mask' in cfg.architecture: resolution = model.mask_head.resolution results = eval_run( exe, compile_program, loader, keys, values, cls, cfg, sub_eval_prog, sub_keys, sub_values, resolution=resolution) # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset)
def main(): env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) import random local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) if FLAGS.fp16: assert (getattr(model.backbone, 'norm_type', None) != 'affine_channel'), \ '--fp16 currently does not support affine channel, ' \ ' please modify backbone settings to use batch norm' with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx: inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(feed_vars) loss = train_fetches['loss'] if FLAGS.fp16: loss *= ctx.get_loss_scale_var() lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) if FLAGS.fp16: loss /= ctx.get_loss_scale_var() # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.print_params: param_delimit_str = '-' * 20 + "All parameters in current graph" + '-' * 20 print(param_delimit_str) for block in train_prog.blocks: for param in block.all_parameters(): print("parameter name: {}\tshape: {}".format(param.name, param.shape)) print('-' * len(param_delimit_str)) return if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) eval_loader.set_sample_list_generator(eval_reader, place) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] if cfg.metric == 'WIDERFACE': extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' start_iter = 0 if cfg.pretrain_weights: checkpoint.load_params(exe, train_prog, cfg.pretrain_weights) pruned_params = FLAGS.pruned_params assert FLAGS.pruned_params is not None, \ "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert len(pruned_params) == len(pruned_ratios), \ "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." assert FLAGS.prune_criterion in ['l1_norm', 'geometry_median'], \ "unsupported prune criterion {}".format(FLAGS.prune_criterion) pruner = Pruner(criterion=FLAGS.prune_criterion) train_prog = pruner.prune( train_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: base_flops = flops(eval_prog) eval_prog = pruner.prune( eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True)[0] pruned_flops = flops(eval_prog) logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format( float(base_flops - pruned_flops) / base_flops, base_flops, pruned_flops)) compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) if FLAGS.resume_checkpoint: checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint) start_iter = checkpoint.global_step() train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) train_loader.set_sample_list_generator(train_reader, place) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' train_stats = TrainingStats(cfg.log_smooth_window, train_keys) train_loader.start() start_time = time.time() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_smooth_window) best_box_ap_list = [0.0, 0] #[map, iter] # use tb-paddle to log data if FLAGS.use_tb: from tb_paddle import SummaryWriter tb_writer = SummaryWriter(FLAGS.tb_log_dir) tb_loss_step = 0 tb_mAP_step = 0 if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution dataset = cfg['EvalReader']['dataset'] box_ap_stats = eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset) for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} # use tb-paddle to log loss if FLAGS.use_tb: if it % cfg.log_iter == 0: for loss_name, loss_value in stats.items(): tb_writer.add_scalar(loss_name, loss_value, tb_loss_step) tb_loss_step += 1 train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ and (not FLAGS.dist or trainer_id == 0): save_name = str(it) if it != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name)) if FLAGS.eval: # evaluation results = eval_run( exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg=cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, dataset=dataset) # use tb_paddle to log mAP if FLAGS.use_tb: tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step) tb_mAP_step += 1 if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = it checkpoint.save(exe, train_prog, os.path.join(save_dir, "best_model")) logger.info("Best test box ap: {}, in iter: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) if args.data == 'cifar10': train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if args.is_server: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() with static.program_guard(train_program, startup_program): data_shape = [None] + image_shape data = static.data(name='data', shape=data_shape, dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') if args.data == 'cifar10': paddle.assign(paddle.reshape(label, [-1, 1]), label) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[data, label], drop_last=True, batch_size=args.batch_size, return_list=False, shuffle=True, use_shared_memory=True, num_workers=4) val_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[data, label], drop_last=False, batch_size=args.batch_size, return_list=False, shuffle=False) data = conv_bn_layer(input=data, num_filters=32, filter_size=3, stride=2, padding='SAME', act='relu6', name='mobilenetv2_conv1') data = archs(data)[0] data = conv_bn_layer(input=data, num_filters=1280, filter_size=1, stride=1, padding='SAME', act='relu6', name='mobilenetv2_last_conv') data = F.adaptive_avg_pool2d(data, output_size=[1, 1], name='mobilenetv2_last_pool') output = static.nn.fc( x=data, size=args.class_dim, weight_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) softmax_out = F.softmax(output) cost = F.cross_entropy(softmax_out, label=label) avg_cost = paddle.mean(cost) acc_top1 = paddle.metric.accuracy(input=softmax_out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) optimizer = paddle.optimizer.Momentum( learning_rate=0.1, momentum=0.9, weight_decay=paddle.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(val_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
print("Usage: python ./{} <model_dir>".format( os.path.basename(__file__))) exit(0) # input parameters model_dir = sys.argv[1] #whole dir timestamp = time.strftime("%Y%m%d-%H%M%S", time.localtime()) save_model_dir = "".join([model_dir, "/", "saved-", timestamp]) os.mkdir(save_model_dir) if os.path.exists(model_dir + "/model"): model_filename = "model" elif os.path.exists(model_dir + "/__model__"): model_filename = "__model__" else: print("[ERROR] No model file `model` or `__model__` found") exit(0) exe = fluid.Executor(fluid.CPUPlace()) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=model_dir, executor=exe) blocks = inference_program.blocks op_num = 0 for block in blocks: op_num += len(block.ops) pass # print(inference_program.blocks) print("FLOPs: {:.6e} {:.3f}Mb ops: {}".format( flops(inference_program), get_dir_size(model_dir) / 1024 / 1024, op_num))
def runTest(self): net = self._net(pretrained=False) FLOPs = flops(net, (1, 3, 32, 32)) self.assertTrue(FLOPs == self._gt)
def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) logger.info(config) # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] program.check_gpu(use_gpu) alg = config['Global']['algorithm'] assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE'] if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']: config['Global']['char_ops'] = CharacterOps(config['Global']) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() startup_program = fluid.Program() train_program = fluid.Program() train_build_outputs = program.build(config, train_program, startup_program, mode='train') train_loader = train_build_outputs[0] train_fetch_name_list = train_build_outputs[1] train_fetch_varname_list = train_build_outputs[2] train_opt_loss_name = train_build_outputs[3] eval_program = fluid.Program() eval_build_outputs = program.build(config, eval_program, startup_program, mode='eval') eval_fetch_name_list = eval_build_outputs[1] eval_fetch_varname_list = eval_build_outputs[2] eval_program = eval_program.clone(for_test=True) train_reader = reader_main(config=config, mode="train") train_loader.set_sample_list_generator(train_reader, places=place) eval_reader = reader_main(config=config, mode="eval") exe = fluid.Executor(place) exe.run(startup_program) # compile program for multi-devices init_model(config, train_program, exe) sen = load_sensitivities("sensitivities_0.data") for i in skip_list: if i in sen.keys(): sen.pop(i) back_bone_list = ['conv' + str(x) for x in range(1, 5)] for i in back_bone_list: for key in list(sen.keys()): if i + '_' in key: sen.pop(key) ratios = get_ratios_by_loss(sen, 0.03) logger.info("FLOPs before pruning: {}".format(flops(eval_program))) pruner = Pruner(criterion='geometry_median') print("ratios: {}".format(ratios)) pruned_val_program, _, _ = pruner.prune(eval_program, fluid.global_scope(), params=ratios.keys(), ratios=ratios.values(), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(train_program, fluid.global_scope(), params=ratios.keys(), ratios=ratios.values(), place=place) logger.info("FLOPs after pruning: {}".format(flops(pruned_val_program))) train_compile_program = program.create_multi_devices_program( pruned_program, train_opt_loss_name) train_info_dict = {'compile_program':train_compile_program,\ 'train_program':pruned_program,\ 'reader':train_loader,\ 'fetch_name_list':train_fetch_name_list,\ 'fetch_varname_list':train_fetch_varname_list} eval_info_dict = {'program':pruned_val_program,\ 'reader':eval_reader,\ 'fetch_name_list':eval_fetch_name_list,\ 'fetch_varname_list':eval_fetch_varname_list} if alg in ['EAST', 'DB']: program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict, is_slim="prune") else: program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict)
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) def eval_func(program): return test(0, program) if args.data == "mnist": train(0, fluid.default_main_program()) pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints) pruned_program, pruned_val_program, iter = pruner.restore() if pruned_program is None: pruned_program = fluid.default_main_program() if pruned_val_program is None: pruned_val_program = val_program base_flops = flops(val_program) start = iter end = args.prune_steps for iter in range(start, end): pruned_program, pruned_val_program = pruner.greedy_prune( pruned_program, pruned_val_program, params, 0.03, topk=1) current_flops = flops(pruned_val_program) print("iter:{}; pruned FLOPS: {}".format( iter, float(base_flops - current_flops) / base_flops)) acc = None for i in range(args.retrain_epoch): train(i, pruned_program) acc = test(i, pruned_val_program) print("iter:{}; pruned FLOPS: {}; acc: {}".format( iter, float(base_flops - current_flops) / base_flops, acc)) pruner.save_checkpoint(pruned_program, pruned_val_program)
def search_mobilenetv2(config, args, image_size, is_server=True): if is_server: ### start a server and a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: ### start a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def search_mobilenetv2(config, args, image_size, is_server=True): image_shape = [3, image_size, image_size] if args.data == 'cifar10': transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', transform=transform, backend='cv2') val_dataset = paddle.vision.datasets.Cifar10(mode='test', transform=transform, backend='cv2') elif args.data == 'imagenet': train_dataset = imagenet_reader.ImageNetDataset(mode='train') val_dataset = imagenet_reader.ImageNetDataset(mode='val') places = static.cuda_places() if args.use_gpu else static.cpu_places() place = places[0] if is_server: ### start a server and a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: ### start a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = static.Program() test_program = static.Program() startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, train_dataset, archs, args, places) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, val_dataset, archs, args, place, is_test=True) test_program = test_program.clone(for_test=True) exe = static.Executor(place) exe.run(startup_program) build_strategy = static.BuildStrategy() train_compiled_program = static.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7} config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)]) factory = SearchSpaceFactory() space = factory.get_search_space(config) port = cfg.SLIM.NAS_PORT server_address = (cfg.SLIM.NAS_ADDRESS, port) sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS, is_server=cfg.SLIM.NAS_IS_SERVER) for step in range(cfg.SLIM.NAS_SEARCH_STEPS): arch = sa_nas.next_archs()[0] start_prog = fluid.Program() train_prog = fluid.Program() data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN) cur_flops = flops(train_prog) print('current step:', step, 'flops:', cur_flops) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(start_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( train_prog).with_data_parallel(loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars(exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format( var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info( "{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]" ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") best_miou = 0.0 for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: ckpt_dir = save_checkpoint(exe, train_prog, '{}_tmp'.format(port)) _, mean_iou, _, mean_acc = evaluate(cfg=cfg, arch=arch, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if best_miou < mean_iou: print('search step {}, epoch {} best iou {}'.format( step, epoch, mean_iou)) best_miou = mean_iou sa_nas.reward(float(best_miou))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.MNIST(mode='train', backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.MNIST(mode='test', backend="cv2", transform=transform) class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] exe = paddle.static.Executor(place) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') batch_size_per_card = int(args.batch_size / len(places)) train_loader = paddle.io.DataLoader(train_dataset, places=places, feed_list=[image, label], drop_last=True, batch_size=batch_size_per_card, shuffle=True, return_list=False, use_shared_memory=True, num_workers=16) valid_loader = paddle.io.DataLoader(val_dataset, places=place, feed_list=[image, label], drop_last=False, return_list=False, use_shared_memory=True, batch_size=batch_size_per_card, shuffle=False) step_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) opt, learning_rate = create_optimizer(args, step_per_epoch) opt.minimize(avg_cost) exe.run(paddle.static.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) paddle.static.load(paddle.static.default_main_program(), args.pretrained_model, exe) def test(epoch, program): acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): for batch_id, data in enumerate(train_loader): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, learning_rate.get_lr(), loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) learning_rate.step() batch_id += 1 test(0, val_program) params = get_pruned_params(args, paddle.static.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(paddle.static.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(), paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) build_strategy = paddle.static.BuildStrategy() exec_strategy = paddle.static.ExecutionStrategy() train_program = paddle.static.CompiledProgram( pruned_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for i in range(args.num_epochs): train(i, train_program) if (i + 1) % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) paddle.static.save_inference_model(infer_model_path, [image], [out], exe, program=pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] inputs_def['iterable'] = True feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) pruned_params = FLAGS.pruned_params assert ( FLAGS.pruned_params is not None ), "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert (len(pruned_params) == len(pruned_ratios) ), "The length of pruned params and pruned ratios should be equal." assert (pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios) ), "The elements of pruned ratios should be in range (0, 1)." base_flops = flops(infer_prog) pruner = Pruner() infer_prog, _, _ = pruner.prune(infer_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True) pruned_flops = flops(infer_prog) logger.info("pruned FLOPS: {}".format( float(base_flops - pruned_flops) / base_flops)) reader = create_reader(cfg.TestReader, devices_num=1) loader.set_sample_list_generator(reader, place) exe.run(startup_prog) if cfg.weights: checkpoint.load_checkpoint(exe, infer_prog, cfg.weights) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() imid2path = dataset.get_imid2path() for iter_id, data in enumerate(loader()): outs = exe.run(infer_prog, feed=data, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) bbox_results = None mask_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results) save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95)
def main(): env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) import random local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) if 'log_iter' not in cfg: cfg.log_iter = 20 # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # add NAS config = ([(cfg.search_space)]) server_address = (cfg.server_ip, cfg.server_port) load_checkpoint = FLAGS.resume_checkpoint if FLAGS.resume_checkpoint else None sa_nas = SANAS(config, server_addr=server_address, init_temperature=cfg.init_temperature, reduce_rate=cfg.reduce_rate, search_steps=cfg.search_steps, save_checkpoint=cfg.save_dir, load_checkpoint=load_checkpoint, is_server=cfg.is_server) start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) eval_reader = create_reader(cfg.EvalReader) for step in range(cfg.search_steps): logger.info('----->>> search step: {} <<<------'.format(step)) archs = sa_nas.next_archs()[0] # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) if FLAGS.fp16: assert (getattr(model.backbone, 'norm_type', None) != 'affine_channel'), \ '--fp16 currently does not support affine channel, ' \ ' please modify backbone settings to use batch norm' with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx: inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = archs(feed_vars, 'train', cfg) loss = train_fetches['loss'] if FLAGS.fp16: loss *= ctx.get_loss_scale_var() lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) if FLAGS.fp16: loss /= ctx.get_loss_scale_var() current_flops = flops(train_prog) logger.info('current steps: {}, flops {}'.format(step, current_flops)) if current_flops > cfg.max_flops: continue # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = archs(feed_vars, 'eval', cfg) eval_prog = eval_prog.clone(True) eval_loader.set_sample_list_generator(eval_reader, place) extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches( fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) compiled_train_prog = fluid.CompiledProgram( train_prog).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) train_loader.set_sample_list_generator(train_reader, place) train_stats = TrainingStats(cfg.log_smooth_window, train_keys) train_loader.start() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_smooth_window) ap = 0 for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = { k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1]) } train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it == cfg.max_iters - 1) and (not FLAGS.dist or trainer_id == 0): save_name = str( it) if it != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name)) if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls) ap = calculate_ap_py(results) train_loader.reset() eval_loader.reset() logger.info('rewards: ap is {}'.format(ap)) sa_nas.reward(float(ap)) current_best_tokens = sa_nas.current_info()['best_tokens'] logger.info("All steps end, the best BlazeFace-NAS structure is: ") sa_nas.tokens2arch(current_best_tokens)
def compress(args): paddle.set_device('gpu' if args.use_gpu else 'cpu') train_reader = None test_reader = None if args.data == "cifar10": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode="train", backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode="test", backend="cv2", transform=transform) class_dim = 10 image_shape = [3, 32, 32] pretrain = False elif args.data == "imagenet": train_dataset = ImageNetDataset("data/ILSVRC2012", mode='train', image_size=224, resize_short_size=256) val_dataset = ImageNetDataset("data/ILSVRC2012", mode='val', image_size=224, resize_short_size=256) class_dim = 1000 image_shape = [3, 224, 224] pretrain = True else: raise ValueError("{} is not supported.".format(args.data)) assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) inputs = [Input([None] + image_shape, 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] # model definition net = models.__dict__[args.model](pretrained=pretrain, num_classes=class_dim) _logger.info("FLOPs before pruning: {}GFLOPs".format( flops(net, [1] + image_shape) / 1000)) net.eval() if args.criterion == 'fpgm': pruner = paddleslim.dygraph.FPGMFilterPruner(net, [1] + image_shape) elif args.criterion == 'l1_norm': pruner = paddleslim.dygraph.L1NormFilterPruner(net, [1] + image_shape) params = get_pruned_params(args, net) ratios = {} for param in params: ratios[param] = args.pruned_ratio plan = pruner.prune_vars(ratios, [0]) _logger.info("FLOPs after pruning: {}GFLOPs; pruned ratio: {}".format( flops(net, [1] + image_shape) / 1000, plan.pruned_flops)) for param in net.parameters(): if "conv2d" in param.name: print("{}\t{}".format(param.name, param.shape)) net.train() model = paddle.Model(net, inputs, labels) steps_per_epoch = int(np.ceil(len(train_dataset) * 1. / args.batch_size)) opt = create_optimizer(args, net.parameters(), steps_per_epoch) model.prepare(opt, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy(topk=(1, 5))) if args.checkpoint is not None: model.load(args.checkpoint) model.fit(train_data=train_dataset, eval_data=val_dataset, epochs=args.num_epochs, batch_size=args.batch_size // ParallelEnv().nranks, verbose=1, save_dir=args.model_path, num_workers=8)
def compress(args): class_dim = 1000 image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): exist = os.path.exists( os.path.join(args.pretrained_model, var.name)) print("exist", exist) return exist #fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(reader.val(), batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() print( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 print("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n, lr_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[ avg_cost.name, acc_top1.name, acc_top5.name, "learning_rate" ]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) lr_n = np.mean(lr_n) print( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {};lrn: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, lr_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): #if "_weights" in param.name and "conv1_weights" not in param.name: if "_sep_weights" in param.name: params.append(param.name) print("fops before pruning: {}".format(flops( fluid.default_main_program()))) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) """ do not inherit learning rate """ if (os.path.exists(args.pretrained_model + "/learning_rate")): os.remove(args.pretrained_model + "/learning_rate") if (os.path.exists(args.pretrained_model + "/@LR_DECAY_COUNTER@")): os.remove(args.pretrained_model + "/@LR_DECAY_COUNTER@") fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruned_program = pruned_program_iter pruned_val_program = pruned_val_program_iter for i in range(args.num_epochs): train(i, pruned_program) test(i, pruned_val_program) save_model(args, exe, pruned_program, pruned_val_program, i)
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, fluid.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(fluid.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) # fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruner = AutoPruner(pruned_val_program_iter, fluid.global_scope(), place, params=params, init_ratios=[0.1] * len(params), pruned_flops=0.1, pruned_latency=None, server_addr=("", 0), init_temperature=100, reduce_rate=0.85, max_try_times=300, max_client_num=10, search_steps=100, max_ratios=0.2, min_ratios=0., is_server=True, key="auto_pruner") while True: pruned_program, pruned_val_program = pruner.prune( pruned_program_iter, pruned_val_program_iter) for i in range(0): train(i, pruned_program) score = test(0, pruned_val_program) pruner.reward(score)