def test_static_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() x = paddle.fluid.data(name="x", shape=[2, 3, 7, 7], dtype="float32") out_1 = paddle.nn.functional.adaptive_max_pool2d( x=x, output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5) out_3 = paddle.nn.functional.adaptive_max_pool2d( x=x, output_size=[2, 5]) #out_4 = paddle.nn.functional.adaptive_max_pool2d( # x=x, output_size=[3, 3], data_format="NHWC") out_5 = paddle.nn.functional.adaptive_max_pool2d( x=x, output_size=[None, 3]) exe = paddle.static.Executor(place=place) [res_1, res_2, res_3, res_5] = exe.run(fluid.default_main_program(), feed={"x": self.x_np}, fetch_list=[out_1, out_2, out_3, out_5]) assert np.allclose(res_1, self.res_1_np) assert np.allclose(res_2, self.res_2_np) assert np.allclose(res_3, self.res_3_np) #assert np.allclose(res_4, self.res_4_np) assert np.allclose(res_5, self.res_5_np)
def _run_static_graph_case(self, x_data, y_data): with program_guard(Program(), Program()): paddle.enable_static() x = paddle.static.data(name='x', shape=x_data.shape, dtype=x_data.dtype) y = paddle.static.data(name='y', shape=y_data.shape, dtype=y_data.dtype) res = paddle.inner(x, y) place = paddle.CUDAPlace( 0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run(paddle.static.default_main_program(), feed={ 'x': x_data, 'y': y_data }, fetch_list=[res]) res = outs[0] return res
def _test_base(self, run_ipu=True): scope = fluid.core.Scope() main_prog = paddle.static.Program() startup_prog = paddle.static.Program() SEED = self.SEED main_prog.random_seed = SEED startup_prog.random_seed = SEED with fluid.scope_guard(scope): with paddle.static.program_guard(main_prog, startup_prog): x = paddle.static.data(name=self.feed_list[0], shape=self.feed_shape[0], dtype=self.feed_dtype[0]) factor = paddle.static.data(name=self.feed_list[1], shape=self.feed_shape[1], dtype=self.feed_dtype[1]) out = paddle.fluid.layers.pow(x, factor=factor, **self.attrs) fetch_list = [out.name] if run_ipu: place = paddle.IPUPlace() else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) if run_ipu: feed_list = self.feed_list ipu_strategy = compiler.get_ipu_strategy() ipu_strategy.is_training = self.is_training program = compiler.IPUCompiledProgram( main_prog, ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) else: program = main_prog result = exe.run(program, feed=self.feed, fetch_list=fetch_list) return result[0]
def prune_params(model, param_config, super_model_sd=None): for name, param in model.named_parameters(): t_value = param.value().get_tensor() value = np.array(t_value).astype("float32") if super_model_sd != None: super_t_value = super_model_sd[name].value().get_tensor() super_value = np.array(super_t_value).astype("float32") if param.name in param_config.keys(): if len(param_config[param.name]) > 1: in_exp = param_config[param.name][0] out_exp = param_config[param.name][1] in_chn = int(value.shape[0]) if in_exp == None else int( value.shape[0] * in_exp) out_chn = int(value.shape[1]) if out_exp == None else int( value.shape[1] * out_exp) prune_value = super_value[:in_chn, :out_chn, ...] \ if super_model_sd != None else value[:in_chn, :out_chn, ...] else: out_chn = int(value.shape[0]) if param_config[ param.name][0] == None else int( value.shape[0] * param_config[param.name][0]) prune_value = super_value[:out_chn, ...] \ if super_model_sd != None else value[:out_chn, ...] else: prune_value = super_value if super_model_sd != None else value p = t_value._place() if p.is_cpu_place(): place = paddle.CPUPlace() elif p.is_cuda_pinned_place(): place = paddle.CUDAPinnedPlace() else: place = paddle.CUDAPlace(p.gpu_device_id()) t_value.set(prune_value, place) if param.trainable: param.clear_gradient()
def run_static(self, use_gpu=False): input = paddle.fluid.data(name='input', shape=[10, 10, 5], dtype='float32') result0 = paddle.prod(input) result1 = paddle.prod(input, axis=1) result2 = paddle.prod(input, axis=-1) result3 = paddle.prod(input, axis=[0, 1]) result4 = paddle.prod(input, axis=1, keepdim=True) result5 = paddle.prod(input, axis=1, dtype='int64') result6 = paddle.prod(input, axis=1, keepdim=True, dtype='int64') place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) static_result = exe.run(feed={"input": self.input}, fetch_list=[ result0, result1, result2, result3, result4, result5, result6 ]) expected_result = np.prod(self.input) self.assertTrue(np.allclose(static_result[0], expected_result)) expected_result = np.prod(self.input, axis=1) self.assertTrue(np.allclose(static_result[1], expected_result)) expected_result = np.prod(self.input, axis=-1) self.assertTrue(np.allclose(static_result[2], expected_result)) expected_result = np.prod(self.input, axis=(0, 1)) self.assertTrue(np.allclose(static_result[3], expected_result)) expected_result = np.prod(self.input, axis=1, keepdims=True) self.assertTrue(np.allclose(static_result[4], expected_result)) expected_result = np.prod(self.input, axis=1, dtype=np.int64) self.assertTrue(np.allclose(static_result[5], expected_result)) expected_result = np.prod(self.input, axis=1, keepdims=True, dtype=np.int64) self.assertTrue(np.allclose(static_result[6], expected_result))
def save_inference_model(self, dirname: str, model_filename: str = None, params_filename: str = None, combined: bool = False, **kwargs): ''' Export the model to Paddle Inference format. Args: dirname(str): The directory to save the paddle inference model. model_filename(str): The name of the saved model file. Default to `__model__`. params_filename(str): The name of the saved parameters file, only takes effect when `combined` is True. Default to `__params__`. combined(bool): Whether to save all parameters in a combined file. Default to True. ''' if hasattr(self, 'processor'): if hasattr(self.processor, 'save_inference_model'): return self.processor.save_inference_model(dirname, model_filename, params_filename, combined) model_filename = '__model__' if not model_filename else model_filename if combined: params_filename = '__params__' if not params_filename else params_filename place = paddle.CPUPlace() exe = paddle.static.Executor(place) feed_dict, fetch_dict, program = self.context(for_test=True, trainable=False) paddle.fluid.io.save_inference_model( dirname=dirname, main_program=program, executor=exe, feeded_var_names=[var.name for var in list(feed_dict.values())], target_vars=list(fetch_dict.values()), model_filename=model_filename, params_filename=params_filename) log.logger.info('Paddle Inference model saved in {}.'.format(dirname))
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 1)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 1], dtype='float32') label = paddle.static.data( name="label", shape=[32, 1], dtype='int64') res = paddle.fluid.layers.expand(a, [1, 32]) loss = res.sum() sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) for epoch in range(100): loss_res = exe.run(main_prog, feed={"a": a_np, "label": label_np}, fetch_list=[loss]) if epoch % 10 == 0: print("Epoch {} | Loss: {}".format(epoch, loss)) return loss_res
def create_quant_model(model, params, prefix, program_config): # 1. store original model with open(prefix + "/model", "wb") as f: f.write(model) with open(prefix + "/params", "wb") as f: f.write(params) # 2. define calibration data paddle.enable_static() place = paddle.CPUPlace() exe = paddle.static.Executor(place) input_shape = program_config.inputs["input_data"].shape def _reader(): for _ in range(200): yield np.random.random(input_shape).astype(np.float32) # 3. quant_post_static quantize_model_path = prefix + "/static_quantized_conv_2d" paddleslim.quant.quant_post_static( executor=exe, weight_bits=8, batch_size=input_shape[0], model_dir=prefix, quantize_model_path=quantize_model_path, sample_generator=_reader, weight_quantize_type='abs_max', model_filename="model", params_filename="params", ) # 4. return quant model with open(quantize_model_path + "/__model__", "rb") as f: model = f.read() with open(quantize_model_path + "/__params__", "rb") as f: params = f.read() return model, params
def test_fsp_loss(self): input = paddle.static.data(name="image", shape=[None, 3, 224, 224]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") student_predict = conv1 + conv2 teacher_main = paddle.static.Program() teacher_startup = paddle.static.Program() with paddle.static.program_guard(teacher_main, teacher_startup): input = paddle.static.data(name="image", shape=[None, 3, 224, 224]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") teacher_predict = conv_bn_layer(conv5, 8, 3, "conv6") place = paddle.CPUPlace() data_name_map = {'image': 'image'} merge(teacher_main, paddle.static.default_main_program(), data_name_map, place) merged_ops = [] for block in paddle.static.default_main_program().blocks: for op in block.ops: merged_ops.append(op.type) distill_loss = fsp_loss( 'teacher_conv5_bn_output.tmp_2', 'teacher_conv6_bn_output.tmp_2', 'conv1_bn_output.tmp_2', 'conv2_bn_output.tmp_2') loss_ops = [] for block in paddle.static.default_main_program().blocks: for op in block.ops: loss_ops.append(op.type) self.assertTrue(set(merged_ops).difference(set(loss_ops)) == set()) self.assertTrue( set(loss_ops).difference(set(merged_ops)) == {'elementwise_sub', 'reduce_mean', 'square', 'fsp'})
def run_gen_ncc_id(attr): nccl_comm_num = attr['nccl_comm_num'] use_hallreduce = attr['use_hierarchical_allreduce'] startup_program = paddle.static.default_startup_program() main_program = paddle.static.default_main_program() with paddle.static.program_guard(main_program, startup_program): nccl_id_var = startup_program.global_block().create_var( name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) for i in range(1, nccl_comm_num): startup_program.global_block().create_var( name="NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) if use_hallreduce: for i in range(0, nccl_comm_num): startup_program.global_block().create_var( name="Hierarchical_inter_NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) startup_program.global_block().create_var( name="Hierarchical_exter_NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) startup_program.global_block().append_op( type="gen_nccl_id", inputs={}, outputs={"NCCLID": nccl_id_var}, attrs=attr) place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_program)
def test_merge(self): student_main = paddle.static.Program() student_startup = paddle.static.Program() with paddle.static.program_guard(student_main, student_startup): input = paddle.static.data(name="image", shape=[None, 3, 224, 224]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") student_predict = conv1 + conv2 student_ops = [] for block in student_main.blocks: for op in block.ops: student_ops.append(op) teacher_main = paddle.static.Program() teacher_startup = paddle.static.Program() with paddle.static.program_guard(teacher_main, teacher_startup): input = paddle.static.data(name="image", shape=[None, 3, 224, 224]) conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") teacher_predict = conv_bn_layer(conv5, 8, 3, "conv6") teacher_ops = [] for block in teacher_main.blocks: for op in block.ops: teacher_ops.append(op) place = paddle.CPUPlace() data_name_map = {'image': 'image'} merge(teacher_main, student_main, data_name_map, place) merged_ops = [] for block in student_main.blocks: for op in block.ops: merged_ops.append(op) self.assertTrue(len(student_ops) + len(teacher_ops) == len(merged_ops))
def _test_api(self): paddle.enable_static() input = np.random.random([2, 25]).astype("float32") shape = [2, 5, 5] main_prog = Program() with program_guard(main_prog, Program()): positive_five = self.fill_constant([1], "int32", 5) x = self.data(name="x", shape=[2, 25], dtype="float32") actual_shape = self.data(name="shape", shape=[3], dtype="int32") # situation 1: have shape( list, no tensor), no actual shape(Tensor) out_1 = self.reshape(x, shape) # situation 2: have shape(list, no tensor), have actual shape(Tensor) out_2 = fluid.layers.reshape(x, shape=shape, actual_shape=actual_shape) # Situation 3: have shape(list, have tensor), no actual shape(Tensor) out_3 = self.reshape(x, shape=[positive_five, 10]) # Situation 4: have shape(Tensor), no actual shape(Tensor) out_4 = self.reshape(x, shape=actual_shape) exe = paddle.static.Executor(place=paddle.CPUPlace()) res_1, res_2, res_3, res_4 = exe.run( main_prog, feed={ "x": input, "shape": np.array([2, 5, 5]).astype("int32") }, fetch_list=[out_1, out_2, out_3, out_4]) assert np.array_equal(res_1, input.reshape(shape)) assert np.array_equal(res_2, input.reshape(shape)) assert np.array_equal(res_3, input.reshape([5, 10])) assert np.array_equal(res_4, input.reshape(shape))
def distributed_training(exe, train_model, train_data_path="./data", batch_size=10, epoch_num=1): train_data = WideDeepDataset(data_path=train_data_path) reader = train_model.loader.set_sample_generator(train_data, batch_size=batch_size, drop_last=True, places=paddle.CPUPlace()) for epoch_id in range(epoch_num): reader.start() try: while True: loss_val = exe.run( program=paddle.static.default_main_program(), fetch_list=[train_model.cost.name]) loss_val = np.mean(loss_val) print("TRAIN ---> pass: {} loss: {}\n".format( epoch_id, loss_val)) except paddle.common_ops_import.core.EOFException: reader.reset()
def which_device(self): """R """ device = envs.get_global_env("runner." + self._runner_name + ".device", default_value="CPU") device = device.upper() if device == 'GPU': self.check_gpu() self.device = Device.GPU gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) self._place = paddle.CUDAPlace(gpu_id) print("PaddleRec run on device GPU: {}".format(gpu_id)) self._exe = paddle.static.Executor(self._place) elif device == "CPU": self.device = Device.CPU self._place = paddle.CPUPlace() self._exe = paddle.static.Executor(self._place) else: raise ValueError("Not Support device {}".format(device)) self._context["device"] = device self._context["exe"] = self._exe self._context["place"] = self._place
def run_static(x_np, y_np, op_str, use_npu=False, binary_op=True): paddle.enable_static() startup_program = fluid.Program() main_program = fluid.Program() place = paddle.CPUPlace() if use_npu and fluid.core.is_compiled_with_npu(): place = paddle.NPUPlace(0) exe = fluid.Executor(place) with fluid.program_guard(main_program, startup_program): x = paddle.static.data(name='x', shape=x_np.shape, dtype=x_np.dtype) op = getattr(paddle, op_str) feed_list = {'x': x_np} if not binary_op: res = op(x) else: y = paddle.static.data(name='y', shape=y_np.shape, dtype=y_np.dtype) feed_list['y'] = y_np res = op(x, y) exe.run(startup_program) static_result = exe.run(main_program, feed=feed_list, fetch_list=[res]) return static_result
def test_dynamic_graph(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) x = paddle.to_variable(self.x_np) out_1 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[3, 3]) out_2 = paddle.nn.functional.adaptive_avg_pool2d(x=x, output_size=5) out_3 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[2, 5]) out_4 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[3, 3], data_format="NHWC") out_5 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[None, 3]) out_6 = paddle.nn.functional.interpolate(x=x, mode="area", size=[2, 5]) assert np.allclose(out_1.numpy(), self.res_1_np) assert np.allclose(out_2.numpy(), self.res_2_np) assert np.allclose(out_3.numpy(), self.res_3_np) assert np.allclose(out_4.numpy(), self.res_4_np) assert np.allclose(out_5.numpy(), self.res_5_np) assert np.allclose(out_6.numpy(), self.res_3_np)
def test_api(self): paddle.enable_static() x_1 = paddle.fluid.data(shape=[None, 1, 4, 5], dtype='int32', name='x_1') paddle.concat([x_1, x_1], 0) input_2 = np.random.random([2, 1, 4, 5]).astype("int32") input_3 = np.random.random([2, 2, 4, 5]).astype("int32") x_2 = fluid.data(shape=[2, 1, 4, 5], dtype='int32', name='x_2') x_3 = fluid.data(shape=[2, 2, 4, 5], dtype='int32', name='x_3') positive_1_int32 = paddle.fluid.layers.fill_constant([1], "int32", 1) positive_1_int64 = paddle.fluid.layers.fill_constant([1], "int64", 1) negative_int64 = paddle.fluid.layers.fill_constant([1], "int64", -3) out_1 = paddle.concat(x=[x_2, x_3], axis=1) out_2 = paddle.concat(x=[x_2, x_3], axis=positive_1_int32) out_3 = paddle.concat(x=[x_2, x_3], axis=positive_1_int64) out_4 = paddle.concat(x=[x_2, x_3], axis=negative_int64) exe = paddle.static.Executor(place=paddle.CPUPlace()) [res_1, res_2, res_3, res_4] = exe.run(paddle.static.default_main_program(), feed={ "x_1": input_2, "x_2": input_2, "x_3": input_3 }, fetch_list=[out_1, out_2, out_3, out_4]) assert np.array_equal(res_1, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_2, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_3, np.concatenate((input_2, input_3), axis=1)) assert np.array_equal(res_4, np.concatenate((input_2, input_3), axis=1))
def test_size_static(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): shape1 = [2, 1, 4, 5] shape2 = [1, 4, 5] x_1 = paddle.fluid.data(shape=shape1, dtype='int32', name='x_1') x_2 = paddle.fluid.data(shape=shape2, dtype='int32', name='x_2') input_1 = np.random.random(shape1).astype("int32") input_2 = np.random.random(shape2).astype("int32") out_1 = paddle.fluid.layers.size(x_1) out_2 = paddle.fluid.layers.size(x_2) exe = paddle.static.Executor(place=paddle.CPUPlace()) res_1, res_2 = exe.run(feed={ "x_1": input_1, "x_2": input_2, }, fetch_list=[out_1, out_2]) assert (np.array_equal( res_1, np.array([np.size(input_1)]).astype("int64"))) assert (np.array_equal( res_2, np.array([np.size(input_2)]).astype("int64")))
def main(): args = conf.parse_args() config = conf.get_config(args.config, overrides=args.override, show=False) assert os.path.exists( os.path.join(config["Global"]["save_inference_dir"], 'inference.pdmodel')) and os.path.exists( os.path.join(config["Global"]["save_inference_dir"], 'inference.pdiparams')) config["DataLoader"]["Eval"]["sampler"]["batch_size"] = 1 config["DataLoader"]["Eval"]["loader"]["num_workers"] = 0 init_logger() device = paddle.set_device("cpu") train_dataloader = build_dataloader(config["DataLoader"], "Eval", device, False) def sample_generator(loader): def __reader__(): for indx, data in enumerate(loader): images = np.array(data[0]) yield images return __reader__ paddle.enable_static() place = paddle.CPUPlace() exe = paddle.static.Executor(place) paddleslim.quant.quant_post_static( executor=exe, model_dir=config["Global"]["save_inference_dir"], model_filename='inference.pdmodel', params_filename='inference.pdiparams', quantize_model_path=os.path.join( config["Global"]["save_inference_dir"], "quant_post_static_model"), sample_generator=sample_generator(train_dataloader), batch_nums=10)
def _test_load(self, run_ipu): if run_ipu: place = paddle.IPUPlace() else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) [inference_program, feed_target_names, fetch_targets ] = (paddle.static.load_inference_model(self.full_name, exe)) if run_ipu: feed_list = feed_target_names fetch_list = [fetch_targets[0].name] ipu_strategy = paddle.static.IpuStrategy() ipu_strategy.set_graph_config(is_training=False) program = paddle.static.IpuCompiledProgram( inference_program, ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) else: program = inference_program tmp = exe.run(program, feed=self.feed, fetch_list=[fetch_targets]) return np.array(tmp)
def run_offline_infer(self): init_model_path = config.get("runner.init_model_path", "") logger.info("Run Offline Infer Begin") place = paddle.CPUPlace() self.exe = paddle.static.Executor(place) self.exe.run(paddle.static.default_startup_program()) fleet.init_worker() if fleet.is_first_worker(): fleet.load_model(init_model_path, mode=0) fleet.barrier_worker() logger.info("Prepare Dataset Begin.") prepare_data_start_time = time.time() dataset = self.wait_and_prepare_dataset() prepare_data_end_time = time.time() logger.info("Prepare Dataset Done, using time {} second.".format( prepare_data_end_time - prepare_data_start_time)) infer_start_time = time.time() self.dataset_offline_infer(dataset) infer_end_time = time.time() logger.info("Infer Dataset Done, using time {} second.".format( infer_end_time - infer_start_time))
def test_case(self): paddle.enable_static() places = [paddle.CPUPlace()] if paddle.fluid.core.is_compiled_with_cuda(): places.append(paddle.CUDAPlace(0)) for place in places: with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): input_data = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]]).astype("float32") x = paddle.fluid.data(name='x', shape=[1, 3, 4], dtype='float32') output, indices = F.max_pool1d(x, kernel_size=2, stride=2, return_mask=True) output_unpool = F.max_unpool1d(output, indices, kernel_size=2, stride=None) exe = paddle.fluid.Executor(place) fetches = exe.run(paddle.fluid.default_main_program(), feed={"x": input_data}, fetch_list=[output_unpool], return_numpy=True) pool1d_out_np = np.array([[[2., 4.], [6., 8.], [10., 12.]]]).astype("float32") indices_np = np.array([[[1, 3], [1, 3], [1, 3]]]).astype("int32") expected_output_unpool = unpool1dmax_forward_naive( pool1d_out_np, indices_np, [2], [2], [0], [4]) self.assertTrue(np.allclose(fetches[0], expected_output_unpool))
def check_grad_with_place(self, place, inputs_to_check, output_names, no_grad_set=None, numeric_grad_delta=0.005, in_place=False, max_relative_error=0.005, user_defined_grads=None, check_dygraph=True): place = paddle.XPUPlace(0) a1 = self.get_grad_with_place( place, inputs_to_check, output_names, no_grad_set=no_grad_set) a2 = self.get_grad_with_place( place, inputs_to_check, output_names, no_grad_set=no_grad_set) a3 = self.get_grad_with_place( paddle.CPUPlace(), inputs_to_check, output_names, no_grad_set=no_grad_set) self._assert_is_close(a1, a2, inputs_to_check, 0.00000001, "Gradient Check On two xpu") self._assert_is_close(a1, a3, inputs_to_check, 0.001, "Gradient Check On cpu & xpu")
def test_api(self): with program_guard(Program(), Program()): # results are from [0, 5). out1 = paddle.randint(5) # shape is a list and dtype is 'int32' out2 = paddle.randint(low=-100, high=100, shape=[64, 64], dtype='int32') # shape is a tuple and dtype is 'int64' out3 = paddle.randint(low=-100, high=100, shape=(32, 32, 3), dtype='int64') # shape is a tensorlist and dtype is 'float32' dim_1 = paddle.fluid.layers.fill_constant([1], "int64", 32) dim_2 = paddle.fluid.layers.fill_constant([1], "int32", 50) out4 = paddle.randint(low=-100, high=100, shape=[dim_1, 5, dim_2], dtype='int32') # shape is a tensor and dtype is 'float64' var_shape = paddle.static.data(name='var_shape', shape=[2], dtype="int64") out5 = paddle.randint(low=1, high=1000, shape=var_shape, dtype='int64') place = paddle.CUDAPlace( 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() exe = paddle.static.Executor(place) outs = exe.run( feed={'var_shape': np.array([100, 100]).astype('int64')}, fetch_list=[out1, out2, out3, out4, out5])
def lazy_apply(self, model): for name, sub_layer in model.named_sublayers(): for param in sub_layer.parameters(include_sublayers=False): if param.name in self._masks: for _mask in self._masks[param.name]: dims = _mask.dims mask = _mask.mask t_value = param.value().get_tensor() value = np.array(t_value).astype("float32") # The name of buffer can not contains "." backup_name = param.name.replace(".", "_") + "_backup" if backup_name not in sub_layer._buffers: sub_layer.register_buffer(backup_name, paddle.to_tensor(value)) _logger.debug( "Backup values of {} into buffers.".format( param.name)) expand_mask_shape = [1] * len(value.shape) for i in dims: expand_mask_shape[i] = value.shape[i] _logger.debug("Expanded mask shape: {}".format( expand_mask_shape)) expand_mask = mask.reshape(expand_mask_shape).astype( "float32") p = t_value._place() if p.is_cpu_place(): place = paddle.CPUPlace() elif p.is_cuda_pinned_place(): place = paddle.CUDAPinnedPlace() else: p = core.Place() p.set_place(t_value._place()) place = paddle.CUDAPlace(p.gpu_device_id()) t_value.set(value * expand_mask, place)
def test_quant_embedding(self): train_program = paddle.static.Program() with paddle.static.program_guard(train_program): input_word = paddle.static.data(name="input_word", shape=[None, 1], dtype='int64') param_attr = paddle.ParamAttr( name='emb', initializer=paddle.nn.initializer.Uniform(-0.005, 0.005)) weight = train_program.global_block().create_parameter( (100, 128), attr=param_attr, dtype="float32") input_emb = paddle.nn.functional.embedding(x=input_word, weight=weight, sparse=True) infer_program = train_program.clone(for_test=True) use_gpu = True place = paddle.CUDAPlace(0) if use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) quant_program = quant.quant_embedding(infer_program, place)
def check_generate_simplify_inference(self, pass_type): paddle.enable_static() program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(program, startup_program): x = paddle.static.data("x", [10, 16, 16], "float32") x1 = paddle.transpose(paddle.transpose(x, [0, 2, 1]), [0, 2, 1]) tmp = paddle.transpose(x, [0, 2, 1]) x2 = paddle.transpose(tmp, [0, 2, 1]) out = paddle.add(x1, paddle.matmul(x2, tmp)) graph = core.Graph(program.desc) before_node_nums = len(graph.nodes()) core.get_pass(pass_type).apply(graph) after_node_nums = len(graph.nodes()) self.assertEqual(after_node_nums, before_node_nums - 6) after_program = paddle.fluid.framework.IrGraph(graph).to_program() executor = paddle.static.Executor(paddle.CPUPlace()) executor.run(startup_program) feed = {"x": np.random.random([10, 16, 16]).astype("float32")} before_out = executor.run(program, feed=feed, fetch_list=[out.name]) after_out = executor.run(after_program, feed=feed, fetch_list=[out.name]) self.assertTrue(np.allclose(before_out, after_out))
def _run_static_single(use_cuda): """ Testing the simple network with executor running directly, using one CPU/GPU. Args: use_cuda (bool): Whether running with CUDA. """ paddle.enable_static() with paddle.static.scope_guard(paddle.static.Scope()): train_prog = paddle.static.Program() startup_prog = paddle.static.Program() startup_prog.random_seed = 1 with paddle.static.program_guard(train_prog, startup_prog): input, out, weight = _simple_network() param_grads = paddle.static.append_backward( out, parameter_list=[weight.name])[0] exe = paddle.static.Executor( paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()) exe.run(startup_prog) exe.run(train_prog, feed={input.name: _prepare_data(1)}, fetch_list=[out.name, param_grads[1].name]) paddle.disable_static()
def __init__(self, range_tables, use_gpu=False, **kwargs): self.use_gpu = use_gpu self.range_tables = range_tables self.lstm_num_layers = kwargs.get('lstm_num_layers') or 1 self.hidden_size = kwargs.get('hidden_size') or 100 self.temperature = kwargs.get('temperature') or None self.controller_lr = kwargs.get('controller_lr') or 1e-4 self.decay_steps = kwargs.get('controller_decay_steps') or None self.decay_rate = kwargs.get('controller_decay_rate') or None self.tanh_constant = kwargs.get('tanh_constant') or None self.decay = kwargs.get('decay') or 0.99 self.weight_entropy = kwargs.get('weight_entropy') or None self.controller_batch_size = kwargs.get('controller_batch_size') or 1 self.max_range_table = max(self.range_tables) + 1 self._create_parameter() self._build_program() self.place = paddle.CUDAPlace(0) if self.use_gpu else paddle.CPUPlace() self.exe = paddle.static.Executor(self.place) self.exe.run(paddle.static.default_startup_program()) self.param_dict = self.get_params(self.learn_program)
def test_static_graph_functional(self): for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.enable_static() x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64") x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64") out_1 = F.pixel_shuffle(x_1, 3) out_2 = F.pixel_shuffle(x_2, 3, "NHWC") exe = paddle.static.Executor(place=place) res_1 = exe.run(fluid.default_main_program(), feed={"x": self.x_1_np}, fetch_list=out_1, use_prune=True) res_2 = exe.run(fluid.default_main_program(), feed={"x2": self.x_2_np}, fetch_list=out_2, use_prune=True) assert np.allclose(res_1, self.out_1_np) assert np.allclose(res_2, self.out_2_np)