def convert(program, place, config=None, scope=None, save_int8=False): """ convert quantized and well-trained ``program`` to final quantized ``program`` that can be used to save ``inference model``. Args: program(fluid.Program): quantized and well-trained ``test program``. place(fluid.CPUPlace or fluid.CUDAPlace): This parameter represents the executor run on which device. config(dict, optional): configs for convert. if set None, will use default config. It must be same with config that used in 'quant_aware'. Default: None. scope(fluid.Scope, optional): Scope records the mapping between variable names and variables, similar to brackets in programming languages. Usually users can use `fluid.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_. When ``None`` will use `fluid.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_ . Default: ``None``. save_int8: Whether to return ``program`` which model parameters' dtype is ``int8``. This parameter can only be used to get model size. Default: ``False``. Returns: Tuple : freezed program which can be used for inference. when ``save_int8`` is False, return ``freezed_program(fluid.Program)``. when ``save_int8`` is True, return ``freezed_program(fluid.Program)`` and ``freezed_program_int8(fluid.Program)`` """ scope = fluid.global_scope() if not scope else scope if config is None: config = _quant_config_default else: assert isinstance(config, dict), "config must be dict" config = _parse_configs(config) _logger.info("convert config {}".format(config)) test_graph = IrGraph(core.Graph(program.desc), for_test=True) support_op_types = [] for op in config['quantize_op_types']: if op in QuantizationFreezePass._supported_quantizable_op_type: support_op_types.append(op) # Freeze the graph after training by adjusting the quantize # operators' order for the inference. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_bits=config['weight_bits'], activation_bits=config['activation_bits'], weight_quantize_type=config['weight_quantize_type'], quantizable_op_type=support_op_types) freeze_pass.apply(test_graph) freezed_program = test_graph.to_program() if save_int8: convert_int8_pass = ConvertToInt8Pass( scope=fluid.global_scope(), place=place, quantizable_op_type=support_op_types) convert_int8_pass.apply(test_graph) freezed_program_int8 = test_graph.to_program() return freezed_program, freezed_program_int8 else: return freezed_program
def freeze_graph(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', for_ci=True, quant_skip_pattern='skip_quant'): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') loss = conv_net(img, label, quant_skip_pattern) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) return [img, label], loss random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(main, startup, False) build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type, skip_pattern=quant_skip_pattern) transform_pass.apply(main_graph) transform_pass.apply(test_graph) dev_name = '_gpu_' if use_cuda else '_cpu_' if not for_ci: marked_nodes = set() for op in main_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) quantized_test_program = test_graph.to_program() iters = 5 batch_size = 8 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) if not for_ci: print('{}: {}'.format('loss' + dev_name + activation_quant_type + '_' + weight_quant_type, loss_v)) test_data = next(test_reader()) with fluid.program_guard(quantized_test_program): w_var = fluid.framework._get_var('conv2d_1.w_0.quantized', quantized_test_program) # Testing with fluid.scope_guard(scope): test_loss1, w_quant = exe.run(program=quantized_test_program, feed=feeder.feed(test_data), fetch_list=[loss, w_var]) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program = test_graph.to_program() with fluid.scope_guard(scope): test_loss2, = exe.run(program=server_program, feed=feeder.feed(test_data), fetch_list=[loss]) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) if not for_ci: print( '{}: {}'.format('test_loss1' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss1)) print( '{}: {}'.format('test_loss2' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss2)) w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) # Maybe failed, this is due to the calculation precision # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) if not for_ci: print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) print('{}: {}'.format('w_quant' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_quant))) # Convert parameter to 8-bit. convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) convert_int8_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program_int8 = test_graph.to_program() # Save the 8-bit parameter and model file. with fluid.scope_guard(scope): fluid.io.save_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, server_program_int8) # Test whether the 8-bit parameter and model file can be loaded successfully. [infer, feed, fetch] = fluid.io.load_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, exe) # Check the loaded 8-bit weight. w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor()) self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) if not for_ci: print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_8bit))) print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) mobile_pass = TransformForMobilePass() mobile_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_mobile' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) mobile_program = test_graph.to_program() with fluid.scope_guard(scope): fluid.io.save_inference_model( 'mobile_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, mobile_program)
def train(args): # parameters from arguments model_name = args.model pretrained_fp32_model = args.pretrained_fp32_model checkpoint = args.checkpoint model_save_dir = args.model_save_dir data_dir = args.data_dir activation_quant_type = args.act_quant_type weight_quant_type = args.wt_quant_type print("Using %s as the actiavtion quantize type." % activation_quant_type) print("Using %s as the weight quantize type." % weight_quant_type) startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() _, _, train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) image, out, test_py_reader, test_cost, test_acc1, test_acc5 = build_program( is_train=False, main_prog=test_prog, startup_prog=startup_prog, args=args) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) main_graph = IrGraph(core.Graph(train_prog.desc), for_test=False) test_graph = IrGraph(core.Graph(test_prog.desc), for_test=True) if pretrained_fp32_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_fp32_model, var.name)) fluid.io.load_vars( exe, pretrained_fp32_model, main_program=train_prog, predicate=if_exist) if args.use_gpu: visible_device = os.getenv('CUDA_VISIBLE_DEVICES') if visible_device: device_num = len(visible_device.split(',')) else: device_num = subprocess.check_output( ['nvidia-smi', '-L']).decode().count('\n') else: device_num = 1 train_batch_size = args.batch_size / device_num test_batch_size = 1 if activation_quant_type == 'abs_max' else 8 train_reader = paddle.batch( reader.train(data_dir=data_dir), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.val(data_dir=data_dir), batch_size=test_batch_size) train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) train_fetch_list = [train_cost.name, train_acc1.name, train_acc5.name, global_lr.name] test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name] # 1. Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. transform_pass = QuantizationTransformPass( scope=fluid.global_scope(), place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type) transform_pass.apply(main_graph) transform_pass.apply(test_graph) if checkpoint: load_persistable_nodes(exe, checkpoint, main_graph) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=train_cost.name, build_strategy=build_strategy) test_prog = test_graph.to_program() params = models.__dict__[args.model]().params for pass_id in range(params["num_epochs"]): train_py_reader.start() train_info = [[], [], []] test_info = [[], [], []] train_time = [] batch_id = 0 try: while True: t1 = time.time() loss, acc1, acc5, lr = exe.run(binary, fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) train_info[0].append(loss) train_info[1].append(acc1) train_info[2].append(acc5) lr = np.mean(np.array(lr)) train_time.append(period) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4}, lr {5}, time {6}" .format(pass_id, batch_id, loss, acc1, acc5, "%.6f" % lr, "%2.2f sec" % period)) sys.stdout.flush() batch_id += 1 except fluid.core.EOFException: train_py_reader.reset() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() test_py_reader.start() test_batch_id = 0 try: while True: t1 = time.time() loss, acc1, acc5 = exe.run(program=test_prog, fetch_list=test_fetch_list) t2 = time.time() period = t2 - t1 loss = np.mean(loss) acc1 = np.mean(acc1) acc5 = np.mean(acc5) test_info[0].append(loss) test_info[1].append(acc1) test_info[2].append(acc5) if test_batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}" .format(pass_id, test_batch_id, loss, acc1, acc5, "%2.2f sec" % period)) sys.stdout.flush() test_batch_id += 1 except fluid.core.EOFException: test_py_reader.reset() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, " "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format( pass_id, train_loss, train_acc1, train_acc5, test_loss, test_acc1, test_acc5)) sys.stdout.flush() save_checkpoint_path = os.path.join(model_save_dir, model_name, str(pass_id)) if not os.path.isdir(save_checkpoint_path): os.makedirs(save_checkpoint_path) save_persistable_nodes(exe, save_checkpoint_path, main_graph) model_path = os.path.join(model_save_dir, model_name, args.act_quant_type) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') mobile_path = os.path.join(model_path, 'mobile') if not os.path.isdir(model_path): os.makedirs(model_path) # 2. Freeze the graph after training by adjusting the quantize # operators' order for the inference. freeze_pass = QuantizationFreezePass( scope=fluid.global_scope(), place=place, weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) server_program = test_graph.to_program() fluid.io.save_inference_model( dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=server_program) # 3. Convert the weights into int8_t type. # (This step is optional.) convert_int8_pass = ConvertToInt8Pass(scope=fluid.global_scope(), place=place) convert_int8_pass.apply(test_graph) server_int8_program = test_graph.to_program() fluid.io.save_inference_model( dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=server_int8_program) # 4. Convert the freezed graph for paddle-mobile execution. # (This step is optional.) mobile_pass = TransformForMobilePass() mobile_pass.apply(test_graph) mobile_program = test_graph.to_program() fluid.io.save_inference_model( dirname=mobile_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=mobile_program)
def main(): cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) if 'log_iter' not in cfg: cfg.log_iter = 20 # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) if 'eval_feed' not in cfg: eval_feed = create(main_arch + 'EvalFeed') else: eval_feed = create(cfg.eval_feed) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) _, test_feed_vars = create_feed(eval_feed, False) eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir) #eval_pyreader.decorate_sample_list_generator(eval_reader, place) test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place) assert os.path.exists(FLAGS.model_path) infer_prog, feed_names, fetch_targets = fluid.io.load_inference_model( dirname=FLAGS.model_path, executor=exe, model_filename='__model__.infer', params_filename='__params__') eval_keys = ['bbox', 'gt_box', 'gt_label', 'is_difficult'] eval_values = [ 'multiclass_nms_0.tmp_0', 'gt_box', 'gt_label', 'is_difficult' ] eval_cls = [] eval_values[0] = fetch_targets[0] results = eval_run(exe, infer_prog, eval_reader, eval_keys, eval_values, eval_cls, test_data_feed) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results(results, eval_feed, cfg.metric, cfg.num_classes, resolution, False, FLAGS.output_eval) logger.info("freeze the graph for inference") test_graph = IrGraph(core.Graph(infer_prog.desc), for_test=True) freeze_pass = QuantizationFreezePass( scope=fluid.global_scope(), place=place, weight_quantize_type=FLAGS.weight_quant_type) freeze_pass.apply(test_graph) server_program = test_graph.to_program() fluid.io.save_inference_model(dirname=os.path.join(FLAGS.save_path, 'float'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_program, model_filename='model', params_filename='weights') logger.info("convert the weights into int8 type") convert_int8_pass = ConvertToInt8Pass(scope=fluid.global_scope(), place=place) convert_int8_pass.apply(test_graph) server_int8_program = test_graph.to_program() fluid.io.save_inference_model(dirname=os.path.join(FLAGS.save_path, 'int8'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_int8_program, model_filename='model', params_filename='weights')
def eval(args): # parameters from arguments place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) val_program, feed_names, fetch_targets = fluid.io.load_inference_model( args.model_path, exe, model_filename="__model__.infer", params_filename="__params__") val_reader = paddle.batch(reader.val(), batch_size=128) feeder = fluid.DataFeeder( place=place, feed_list=feed_names, program=val_program) results = [] for batch_id, data in enumerate(val_reader()): image = [[d[0]] for d in data] label = [[d[1]] for d in data] feed_data = feeder.feed(image) pred = exe.run(val_program, feed=feed_data, fetch_list=fetch_targets) pred = np.array(pred[0]) label = np.array(label) sort_array = pred.argsort(axis=1) top_1_pred = sort_array[:, -1:][:, ::-1] top_1 = np.mean(label == top_1_pred) top_5_pred = sort_array[:, -5:][:, ::-1] acc_num = 0 for i in range(len(label)): if label[i][0] in top_5_pred[i]: acc_num += 1 top_5 = acc_num / len(label) results.append([top_1, top_5]) result = np.mean(np.array(results), axis=0) print("top1_acc/top5_acc= {}".format(result)) sys.stdout.flush() _logger.info("freeze the graph for inference") test_graph = IrGraph(core.Graph(val_program.desc), for_test=True) freeze_pass = QuantizationFreezePass( scope=fluid.global_scope(), place=place, weight_quantize_type=args.weight_quant_type) freeze_pass.apply(test_graph) server_program = test_graph.to_program() fluid.io.save_inference_model( dirname=os.path.join(args.save_path, 'float'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_program, model_filename='model', params_filename='weights') _logger.info("convert the weights into int8 type") convert_int8_pass = ConvertToInt8Pass( scope=fluid.global_scope(), place=place) convert_int8_pass.apply(test_graph) server_int8_program = test_graph.to_program() fluid.io.save_inference_model( dirname=os.path.join(args.save_path, 'int8'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_int8_program, model_filename='model', params_filename='weights')