def test_check_output(self): paddle.enable_static() self.check_output()
def test_check_grad_normal(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_grad_with_place(place, ['X'], 'Out')
x_mean_out = fluid.layers.reduce_mean(data, dim=-1, keep_dim=True) x_sub_mean_out = fluid.layers.elementwise_sub(data, x_mean_out) x_sub_mean_sqr_out = fluid.layers.elementwise_pow( x_sub_mean_out, sqr_pow) std_dev_out = fluid.layers.reduce_mean(x_sub_mean_sqr_out, dim=-1, keep_dim=True) std_dev_eps_out = fluid.layers.elementwise_add(std_dev_out, eps) std_dev_eps_sqrt_out = fluid.layers.sqrt(std_dev_eps_out) division_out = fluid.layers.elementwise_div( x_sub_mean_out, std_dev_eps_sqrt_out) scale_out = fluid.layers.elementwise_mul(division_out, gamma) shift_out = fluid.layers.elementwise_add(scale_out, beta) self.feeds = { "data": np.random.random((3, 64, 120)).astype("float32"), } self.fetch_list = [shift_out] def test_check_output(self): use_gpu = False self.check_output_with_option(use_gpu) self.assertTrue( PassVersionChecker.IsCompatible("layer_norm_fuse_pass")) if __name__ == "__main__": enable_static() unittest.main()
def test_in_static_mode(self): paddle.enable_static() self.check_static_float_result() self.check_static_complex_result()
def fit(): EPOCH_NUM = 3 BATCH_SIZE = 128 type_size = 10 role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) fleet.init(role) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True type_size = createDataList('F:/机器学习/CNN/train', 'D:/cnn/cnn.model.data' + "/") # 用于训练的数据提供器 train_reader = dataReader("D:/cnn/cnn.model.data/trainer.list") train_reader = paddle.batch(paddle.reader.shuffle(reader=train_reader, buf_size=BATCH_SIZE * 100), batch_size=BATCH_SIZE) test_reader = dataReader("D:/cnn/cnn.model.data/test.list") test_reader = paddle.batch(paddle.reader.shuffle(reader=test_reader, buf_size=BATCH_SIZE * 100), batch_size=BATCH_SIZE) data_shape = [3, 32, 32] paddle.enable_static() images = fluid.layers.data(name='images', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # 获取分类器 predict = networkConfiguration(images, type_size) # 定义损失函数和准确率 cost = fluid.layers.cross_entropy(input=predict, label=label) # 交叉熵 avg_cost = fluid.layers.mean(cost) # 计算cost中所有元素的平均值 acc = fluid.layers.accuracy(input=predict, label=label) # 使用输入和标签计算准确率 # 定义优化方法 test_program = fluid.default_main_program().clone(for_test=True) # 获取测试程序 optimizer = fluid.optimizer.Adam(learning_rate=0.001) # 定义优化方法 optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) if fleet.is_server(): fleet.init_server() fleet.run_server() elif fleet.is_worker(): fleet.init_worker() ########## 模型训练&模型评估 ########## # 创建Executor use_cuda = False # 定义使用CPU还是GPU,使用CPU时use_cuda=False place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print("完成") # 定义数据映射器 feeder = fluid.DataFeeder(feed_list=[images, label], place=place) for pass_id in range(EPOCH_NUM): # 开始训练 for batch_id, data in enumerate(train_reader()): # 遍历train_reader train_cost, train_acc = exe.run( program=fluid.default_main_program(), # 运行主程序 feed=feeder.feed(data), # 喂入一个batch的数据 fetch_list=[avg_cost, acc]) # fetch均方误差和准确率 # 每100次batch打印一次训练、进行一次测试 if batch_id % 20 == 0: print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' % (pass_id, batch_id, train_cost[0], train_acc[0])) # 开始测试 test_costs = [] # 测试的损失值 test_accs = [] # 测试的准确率 for batch_id, data in enumerate(test_reader()): test_cost, test_acc = exe.run( program=test_program, # 执行训练程序 feed=feeder.feed(data), # 喂入数据 fetch_list=[avg_cost, acc]) # fetch误差、准确率 test_costs.append(test_cost[0]) # 记录每个batch的损失值 test_accs.append(test_acc[0]) # 记录每个batch的准确率 test_cost = (sum(test_costs) / len(test_costs)) # 计算误差平均值 test_acc = (sum(test_accs) / len(test_accs)) # 计算准确率平均值 print('Test:%d, Cost:%0.5f, ACC:%0.5f' % (pass_id, test_cost, test_acc)) save(predict, "D:/cnn/cnn.model", exe)
def setUp(self): np.random.seed(123) paddle.enable_static() self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): self.places.append(fluid.CUDAPlace(0))
def save_quantized_model(self, layer, path, input_spec=None, **config): """ Save the quantized model for the inference. Args: layer (Layer): The Layer to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance( layer, dygraph.Layer), "model must be the instance of dygraph.Layer" is_dynamic_mode = False with dygraph.guard(): layer.eval() for handle in self._register_hook_handle_list: handle.remove() for key in self._out_scale_dict: self._out_scale_dict[key] = float( self._out_scale_dict[key].numpy()) paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) file_prefix = os.path.basename(path) dirname = os.path.dirname(path) model_filename = file_prefix + INFER_MODEL_SUFFIX params_filename = file_prefix + INFER_PARAMS_SUFFIX [inference_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) # Traverse all ops in the program and find out the op matching # the Layer in the dynamic graph. layer_var_dict = {} for block in inference_program.blocks: for op in block.ops: if op.type in _op_real_in_out_name: output_var_names = quantization_pass._get_op_output_var_names( op) for output_var_name in output_var_names: output_var_tensor = block.var(output_var_name) if output_var_tensor.dtype not in [ core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32 ]: continue # Because the Layer in dygraph may correspond to multiple ops # in static program after being saved. To ensure correctness, # the outscale collected for output of dygraph Layer can only # be set to the last op in the corresponding ops in static program. # # We can judge the execution order of the ops which corresponding # to dygraph Layer by the name of output. And use dict to save # the corresponding relationship between the dygraph Layer and the # static graph op that needs to set the outscale attribute. if '.' not in output_var_name: continue dynamic_layer_name, var_name_suffix = output_var_name.split( ".") if dynamic_layer_name in layer_var_dict: if layer_var_dict[dynamic_layer_name][ 0] < var_name_suffix: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] else: layer_var_dict[dynamic_layer_name] = [ var_name_suffix, op ] # Because the naming styles of static and dynamic graph are different, # in order to avoid mistakes, we unify the name here. for (layer_name, var_name_op_list) in layer_var_dict.items(): if 'prelu' in layer_name: layer_name = layer_name.replace('prelu', 'p_re_lu') if 'relu' in layer_name: layer_name = layer_name.replace('relu', 're_lu') if layer_name not in self._out_scale_dict: continue var_name_op_list[1]._set_attr('out_threshold', self._out_scale_dict[layer_name]) # Save the processed program. save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=inference_program.clone(), model_filename=model_filename, params_filename=params_filename) if is_dynamic_mode: paddle.disable_static()
def run_train(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile train_data, val_data = load_train_data() out(log, "# Training set contains {} Sequences.".format(len(train_data))) out(log, "# Validation set contains {} Sequences.".format(len(val_data))) trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id ) if trainer_count > 1 else fluid.CUDAPlace(0) exe = fluid.Executor(place) paddle.enable_static() out(log, "# Paddle: Using device: {}".format(place)) out(log, "# Initializing model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=args.dropout, ) main_program = fluid.default_main_program() startup_program = fluid.default_startup_program() current_processed, total_processed = 0, 0 check_every = math.floor((len(train_data) / args.checks_per_epoch)) best_dev_loss, best_dev_model_path = np.inf, None start_time = time.time() out( log, "# Checking validation {} times an epoch (every {} batches)".format( args.checks_per_epoch, check_every)) patience = check_every * args.checks_per_epoch * 2 batches_since_dev_update = 0 train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=1) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) test_program = main_program.clone(for_test=True) feeder = paddle.fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) learning_rate = 1e-4 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-08 optimizer = fluid.optimizer.Adam( learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, ) optimizer.minimize(avg_loss) exe.run(startup_program) exe_test = fluid.Executor(place) start_epoch_index = 1 for epoch in itertools.count(start=start_epoch_index): if epoch >= args.epochs + 1: break train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) out(log, "# Epoch {} starting.".format(epoch)) epoch_start_time = time.time() for batch_index, batch in enumerate(train_reader()): batch_loss, pred_values = exe.run( main_program, feed=feeder.feed(batch), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) batch_loss = np.array(batch_loss) pred_values = np.array(pred_values) total_processed += len(batch) current_processed += len(batch) batches_since_dev_update += 1 out( log, "epoch {:,} " "batch {:,} " "processed {:,} " "batch-loss {:.4f} " "epoch-elapsed {} " "total-elapsed {} " "".format( epoch, batch_index + 1, total_processed, float(batch_loss), format_elapsed(epoch_start_time), format_elapsed(start_time), )) if math.isnan(float(batch_loss[0])): sys.exit("got NaN loss, training failed.") if current_processed >= check_every: current_processed -= (check_every) val_results = [] for data in val_reader(): loss, pred = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:5.3f} (MSE) -> {:5.3f} (RMSD)". format(float(val_loss), math.sqrt(float(val_loss)))) if val_loss < best_dev_loss: batches_since_dev_update = 0 if best_dev_model_path is not None: path = "{}/{}_dev={:.4f}".format( args.model_path_base, args.model_path_base, best_dev_loss) print("\t\t", best_dev_model_path, os.path.exists(path)) if os.path.exists(path): out( log, "* Removing previous model file {}...".format( path)) shutil.rmtree(path) best_dev_loss = val_loss best_dev_model_path = "{}_dev={:.4f}".format( args.model_path_base, val_loss) out( log, "* Saving new best model to {}...".format( best_dev_model_path)) if not os.path.exists(args.model_path_base): os.mkdir(args.model_path_base) fluid.io.save_inference_model( args.model_path_base + "/" + best_dev_model_path, ['seq', 'dot'], [predictions], exe)
def setUp(self): paddle.enable_static()
def test_dygraph(self): paddle.disable_static() x = paddle.randn([10, 10], dtype='float32') y = paddle.poisson(x) self.assertTrue(np.min(y.numpy()) >= 0) paddle.enable_static()
def run_test_withlabel(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id() ) if trainer_count > 1 else fluid.CUDAPlace(0) out(log, "Loading data...") train_data, val_data = load_train_data() test_data = load_test_label_data() out(log, "Loading model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=0, ) exe = fluid.Executor(place) paddle.enable_static() fluid.io.load_inference_model(args.model_path_base, exe) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) test_reader = fluid.io.batch(reader_creator(args, test_data, seq_vocab, bracket_vocab), batch_size=args.batch_size) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) feeder = fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) val_results = [] for data in val_reader(): loss, pred = exe.run(test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(val_loss), math.sqrt(float(val_loss)))) test_results = [] avg_losses = [] for data in test_reader(): loss, pred, gold = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name, y.name], return_numpy=False) loss = np.array(loss) test_results.append(loss[0]) pred = list(np.array(pred)) gold = list(np.array(gold)) """ print("PRED", ["{:5.3f}".format(x) for x in pred[:20]], "...") print("GOLD", ["{:5.3f}".format(x) for x in gold[:20]], "...") MSE = [] for p,g in zip(pred, gold): mse = (p - g) ** 2 MSE.append(mse) avg_mse = sum(MSE) / len(MSE) print("MSE ", ["{:5.3f}".format(x) for x in MSE[:20]], "...") print("AVG LOSS:", avg_mse) print() avg_losses.append(avg_mse) """ test_loss = sum(test_results) / len(test_results) out( log, "# Test Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(test_loss), math.sqrt(float(test_loss))))
def test_fixed_random_number(self): if not paddle.is_compiled_with_cuda(): return paddle.disable_static() paddle.set_device('gpu') paddle.seed(2021) x = paddle.full([32, 3, 1024, 768], 10., dtype="float32") y = paddle.poisson(x) y_np = y.numpy() expect = [ 13., 13., 11., 8., 12., 6., 9., 15., 16., 6., 13., 12., 9., 15., 17., 8., 11., 16., 11., 10. ] self.assertTrue(np.array_equal(y_np[0, 0, 0, 0:20], expect)) expect = [ 15., 7., 12., 8., 14., 10., 10., 11., 11., 11., 21., 6., 9., 13., 13., 11., 6., 9., 12., 12. ] self.assertTrue(np.array_equal(y_np[8, 1, 300, 200:220], expect)) expect = [ 10., 15., 9., 6., 4., 13., 10., 10., 13., 12., 9., 7., 10., 14., 7., 10., 8., 5., 10., 14. ] self.assertTrue(np.array_equal(y_np[16, 1, 600, 400:420], expect)) expect = [ 10., 9., 14., 12., 8., 9., 7., 8., 11., 10., 13., 8., 12., 9., 7., 8., 11., 11., 12., 5. ] self.assertTrue(np.array_equal(y_np[24, 2, 900, 600:620], expect)) expect = [ 15., 5., 11., 13., 12., 12., 13., 16., 9., 9., 7., 9., 13., 11., 15., 6., 11., 9., 10., 10. ] self.assertTrue(np.array_equal(y_np[31, 2, 1023, 748:768], expect)) x = paddle.full([16, 1024, 1024], 5., dtype="float32") y = paddle.poisson(x) y_np = y.numpy() expect = [ 4., 5., 2., 9., 8., 7., 4., 7., 4., 7., 6., 3., 10., 7., 5., 7., 2., 5., 5., 6. ] self.assertTrue(np.array_equal(y_np[0, 0, 100:120], expect)) expect = [ 1., 4., 8., 11., 6., 5., 4., 4., 7., 4., 4., 7., 11., 6., 5., 3., 4., 6., 3., 3. ] self.assertTrue(np.array_equal(y_np[4, 300, 300:320], expect)) expect = [ 7., 5., 4., 6., 8., 5., 6., 7., 7., 7., 3., 10., 5., 10., 4., 5., 8., 7., 5., 7. ] self.assertTrue(np.array_equal(y_np[8, 600, 600:620], expect)) expect = [ 8., 6., 7., 4., 3., 0., 4., 6., 6., 4., 3., 10., 5., 1., 3., 8., 8., 2., 1., 4. ] self.assertTrue(np.array_equal(y_np[12, 900, 900:920], expect)) expect = [ 2., 1., 14., 3., 6., 5., 2., 2., 6., 5., 7., 4., 8., 4., 8., 4., 5., 7., 1., 7. ] self.assertTrue(np.array_equal(y_np[15, 1023, 1000:1020], expect)) paddle.enable_static()
def save_quantized_model(self, model, path, input_spec=None, onnx_format=False, **config): """ Save the quantized model for the inference. Args: model (Layer): The model to be saved. path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward method, which can be described by InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. onnx_format (bool, optional): Whether to export the quantized model with format of ONNX. Default is False. **configs (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. The following options are currently supported: (1) output_spec (list[Tensor]): Selects the output targets of the saved model. By default, all return variables of original Layer's forward method are kept as the output of the saved model. If the provided ``output_spec`` list is not all output variables, the saved model will be pruned according to the given ``output_spec`` list. Returns: None """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() place = core.CPUPlace() scope = global_scope() exe = Executor(place) dirname = os.path.dirname(path) basename = os.path.basename(path) model_filename = basename + INFER_MODEL_SUFFIX params_filename = basename + INFER_PARAMS_SUFFIX [infer_program, feed_target_names, fetch_targets ] = (load_inference_model(dirname=dirname, executor=exe, model_filename=model_filename, params_filename=params_filename)) self._gather_scales(infer_program, scope, fetch_targets) # Remove `moving_average_abs_max_scale` node in sub graphs. graph = IrGraph(core.Graph(infer_program.desc), for_test=False) for sub_graph in graph.all_sub_graphs(): for _op in sub_graph.all_op_nodes(): if _op.name() == "moving_average_abs_max_scale": sub_graph.safe_remove_nodes(_op) sub_graph.resolve_hazard() infer_program = graph.to_program() self._set_skip_quant_attr(infer_program) clip_extra = False if onnx_format: graph = IrGraph(core.Graph(infer_program.desc), for_test=False) transform_pass = ReplaceFakeQuantDequantPass(scope, place) transform_pass.apply(graph) quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass.apply(graph) infer_program = graph.to_program() clip_extra = True save_inference_model(dirname=dirname, feeded_var_names=feed_target_names, target_vars=fetch_targets, executor=exe, main_program=infer_program.clone(), model_filename=model_filename, params_filename=params_filename, clip_extra=clip_extra) if is_dynamic_mode: paddle.disable_static()
def test_check_grad(self): paddle.enable_static() self.check_grad(set(['X']), 'Out')
def test_assign_List(self): paddle.disable_static() l = [1, 2, 3] result = paddle.assign(l) self.assertTrue(np.allclose(result.numpy(), np.array(l))) paddle.enable_static()
def setUp(self): paddle.enable_static() self.shapes = [[3, 4], [2, 7], [5, 6], [7, 8]] self.seed = 10 self.place = paddle.fluid.NPUPlace(0) self.__class__.use_npu = True
def test_type_error(self): paddle.enable_static() with program_guard(Program(), Program()): x = [paddle.randn([3, 3]), paddle.randn([3, 3])] # not support to assign list(var) self.assertRaises(TypeError, paddle.assign, x)
images_path = os.listdir(os.path.join(data_root, class_file_list[i])) for image_path in images_path: images_data.append( vision.image_load(data_root + '/' + class_file_list[i] + '/' + image_path, backend='cv2')) images_label.append(i) count_value += 1 print(count_value) assert len(images_data) == len(images_label) def reader(): for i in range(int(len(images_data)/batch_size)-1): data = images_data[i*batch_size:(i+1)*batch_size] data = np.array(data) data = np.transpose(data,(0,3, 1, 2)) data = (data - 127.5) / 127.5 yield data return reader if __name__ == '__main__': paddle.enable_static() # 静态图模式 USE_GPU = True # 使用GPU simple_generator = batch_generator(data_reader()) place = paddle.CUDAPlace(0) if USE_GPU else paddle.CPUPlace() exe = paddle.static.Executor(place) paddleslim.quant.quant_post_static( executor=exe, model_dir='./output', model_filename='/output/Backbone_epoch99.pdmodel', params_filename='/output/Backbone_epoch99.pdiparams', quantize_model_path='quant_post_static_model', sample_generator=simple_generator, batch_size=128, batch_nums=10)
def test_out_scale_acc(self): seed = 1000 lr = 0.1 imperative_out_scale = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=32, drop_last=True) lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal( loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(p_shape).astype('float32') fixed_state[name] = value lenet.set_dict(fixed_state) imperative_out_scale.quantize(lenet) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) paddle.enable_static() if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = ( fluid.io.load_inference_model( dirname=save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) model_ops = inference_program.global_block().ops conv2d_count, mul_count = 0, 0 for i, op in enumerate(model_ops): if op.type == 'conv2d': if conv2d_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) conv2d_count += 1 if op.type == 'mul': if mul_count > 0: self.assertTrue( 'fake_quantize_dequantize' in model_ops[i - 1].type) else: self.assertTrue( 'fake_quantize_dequantize' not in model_ops[i - 1].type) mul_count += 1
def setUp(self): paddle.enable_static() self.feed = {"x": np.array([[0], [0], [1], [0]], dtype='float32')} self.expected = np.array([[0], [1], [0]], dtype='float32') self.build_program() self.exe = paddle.static.Executor(paddle.CPUPlace())
filter_size=3, act="tanh", pool_type="sqrt") conv_4 = fluid.nets.sequence_conv_pool(input=emb, num_filters=hid_dim2, filter_size=4, act="tanh", pool_type="sqrt") output = fluid.layers.fc(input=[conv_3, conv_4], size=class_dim, act='softmax') return output paddle.enable_static() #####add by mart 21-3-19 # 定义输入数据, lod_level不为0指定输入数据为序列数据 words = fluid.layers.data(name='words', shape=[1], dtype='int64', lod_level=1) label = fluid.layers.data(name='label', shape=[1], dtype='int64') # 获取数据字典长度 dict_dim = get_dict_len('./data/dict_txt.txt') # 获取卷积神经网络 # model = CNN_net(words, dict_dim, 15) # 获取分类器 model = CNN_net(words, dict_dim) # 获取损失函数和准确率 cost = fluid.layers.cross_entropy(input=model, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=model, label=label) # 获取预测程序
def test_static(self): paddle.enable_static() def run_qr_static(shape, mode, dtype): if dtype == "float32": np_dtype = np.float32 elif dtype == "float64": np_dtype = np.float64 a = np.random.rand(*shape).astype(np_dtype) m = a.shape[-2] n = a.shape[-1] min_mn = min(m, n) if mode == "reduced" or mode == "r": k = min_mn else: k = m np_q_shape = list(a.shape[:-2]) np_q_shape.extend([m, k]) np_r_shape = list(a.shape[:-2]) np_r_shape.extend([k, n]) np_q = np.zeros(np_q_shape).astype(np_dtype) np_r = np.zeros(np_r_shape).astype(np_dtype) places = [] places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) for place in places: with fluid.program_guard(fluid.Program(), fluid.Program()): batch_size = a.size // (a.shape[-1] * a.shape[-2]) for i in range(batch_size): coord = np.unravel_index(i, a.shape[:-2]) if mode == "r": tmp_r = np.linalg.qr(a[coord], mode=mode) np_r[coord] = tmp_r else: tmp_q, tmp_r = np.linalg.qr(a[coord], mode=mode) np_q[coord] = tmp_q np_r[coord] = tmp_r x = paddle.fluid.data(name="input", shape=shape, dtype=dtype) if mode == "r": r = paddle.linalg.qr(x, mode=mode) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": a}, fetch_list=[r]) self.assertTrue( np.allclose(fetches[0], np_r, atol=1e-5)) else: q, r = paddle.linalg.qr(x, mode=mode) exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": a}, fetch_list=[q, r]) self.assertTrue( np.allclose(fetches[0], np_q, atol=1e-5)) self.assertTrue( np.allclose(fetches[1], np_r, atol=1e-5)) tensor_shapes = [ (3, 5), (5, 5), (5, 3), # 2-dim Tensors (2, 3, 5), (3, 5, 5), (4, 5, 3), # 3-dim Tensors (2, 5, 3, 5), (3, 5, 5, 5), (4, 5, 5, 3) # 4-dim Tensors ] modes = ["reduced", "complete", "r"] dtypes = ["float32", "float64"] for tensor_shape, mode, dtype in itertools.product( tensor_shapes, modes, dtypes): run_qr_static(tensor_shape, mode, dtype)
def main(): paddle.enable_static() args = parser.parse_args() print_arguments(args) check_cuda(args.use_gpu) save_inference_model(args)
def main(args): bert_config = BertConfig(args.bert_config_path) bert_config.print_config() if args.use_xpu: paddle.enable_static() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = get_device_num() elif args.use_xpu: xpu_id = int(os.getenv('FLAGS_selected_xpus', '0')) place = fluid.XPUPlace(xpu_id) dev_count = len([place]) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) task_name = args.task_name.lower() processors = { 'xnli': reader.XnliProcessor, 'cola': reader.ColaProcessor, 'mrpc': reader.MrpcProcessor, 'mnli': reader.MnliProcessor, } processor = processors[task_name](data_dir=args.data_dir, vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") train_program = fluid.Program() startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed train_program.random_seed = args.random_seed if args.do_train: # NOTE: If num_trainers > 1, the shuffle_seed must be set, because # the order of batch data generated by reader # must be the same in the respective processes. shuffle_seed = 1 if num_trainers > 1 else None train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, dev_count=dev_count, shuffle=args.shuffle, shuffle_seed=shuffle_seed) num_train_examples = processor.get_num_examples(phase='train') if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) scheduled_lr, loss_scaling = optimization( loss=loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.do_val: dev_prog = fluid.Program() with fluid.program_guard(dev_prog, startup_prog): with fluid.unique_name.guard(): dev_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) dev_prog = dev_prog.clone(for_test=True) dev_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=1, dev_count=1, shuffle=False), place) if args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) test_prog = test_prog.clone(for_test=True) test_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='test', epoch=1, dev_count=1, shuffle=False), place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = args.use_fast_executor exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope build_strategy = fluid.BuildStrategy() if args.use_cuda and num_trainers > 1: assert shuffle_seed is not None dist_utils.prepare_for_multi_process(exe, build_strategy, train_program) train_data_generator = fluid.contrib.reader.distributed_batch_reader( train_data_generator) if args.use_xpu: train_compiled_program = train_program else: train_compiled_program = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) train_data_loader.set_batch_generator(train_data_generator, place) if args.do_train: train_data_loader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() throughput = [] ce_info = [] total_batch_num=0 # used for benchmark interval_seq_num = 0 while True: try: steps += 1 total_batch_num += 1 # used for benchmark if args.max_iter and total_batch_num == args.max_iter: # used for benchmark return if args.use_fp16: fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name, loss_scaling.name] else: fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name] outputs = exe.run(train_compiled_program, fetch_list=fetch_list) interval_seq_num += np.sum( outputs[3] ) # get the sequence number if steps % args.skip_steps == 0: if args.use_fp16: np_loss, np_acc, np_lr, np_num_seqs, np_scaling = outputs else: np_loss, np_acc, np_lr, np_num_seqs = outputs total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size( ) verbose += "learning rate: %f" % np_lr[0] if args.use_fp16: verbose += ", loss scaling: %f" % np_scaling[0] print(verbose) current_example, current_epoch = processor.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin # profiler tools if args.is_profiler and current_epoch == 0 and steps == args.skip_steps: profiler.start_profiler("All") elif args.is_profiler and current_epoch == 0 and steps == args.skip_steps * 2: profiler.stop_profiler("total", args.profiler_path) return log_record = "epoch: {}, progress: {}/{}, step: {}, ave loss: {}, ave acc: {}".format( current_epoch, current_example, num_train_examples, steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs)) ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time]) if steps > 0 : throughput.append( args.skip_steps / used_time) log_record = log_record + ", speed: %f steps/s" % (args.skip_steps / used_time) + ", ips: %f sequence/s" % ( interval_seq_num / used_time ) print(log_record) else: print(log_record) total_cost, total_acc, total_num_seqs = [], [], [] interval_seq_num = 0 time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.save(program=train_program, model_path=save_path) if steps % args.validation_steps == 0: print("Average throughtput: %s" % (np.average(throughput))) throughput = [] # evaluate dev set if args.do_val: evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # evaluate test set if args.do_test: evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.save(program=train_program, model_path=save_path) train_data_loader.reset() break if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_acc = 0 ce_time = 0 try: ce_cost = ce_info[-2][0] ce_acc = ce_info[-2][1] ce_time = ce_info[-2][2] except: print("ce info error") print("kpis\ttrain_duration_%s_card%s\t%s" % (args.task_name, card_num, ce_time)) print("kpis\ttrain_cost_%s_card%s\t%f" % (args.task_name, card_num, ce_cost)) print("kpis\ttrain_acc_%s_card%s\t%f" % (args.task_name, card_num, ce_acc)) # final eval on dev set if args.do_val: print("Final validation result:") evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_test: print("Final test result:") evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test")
def test_check_output(self): if paddle.is_compiled_with_xpu(): paddle.enable_static() place = paddle.XPUPlace(0) self.check_output_with_place(place)
def test_errors(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): # test paddle.nn.HSigmoidLoss self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, 6, 1) # test paddle.nn.functional.hsigmoid_loss x = paddle.static.data('x', [4, 6]) label = paddle.static.data('label', [4, 1], 'int64') weight = paddle.static.data('weight', [7, 6]) bias = paddle.static.data('bias', [7]) x_int32 = paddle.static.data('x_int32', [4, 6], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x_int32, label, 8, weight) label_float32 = paddle.static.data('label_float32', [4, 1], 'float32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label_float32, 8, weight) weight_int32 = paddle.static.data('weight_int32', [7, 6], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight_int32) bias_int32 = paddle.static.data('bias_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, bias=bias_int32) path_table_int32 = paddle.static.data('path_table_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, path_table=path_table_int32) path_code_int32 = paddle.static.data('path_code_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, path_code=path_code_int32) # test paddle.nn.HSigmoidLoss paddle.disable_static(self.place) x_arr = np.array([], dtype=np.float32) x = paddle.to_tensor(np.reshape(x_arr, (100000, 0))) label = paddle.to_tensor(0, dtype='int64') self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, x, label) # test paddle.nn.functional.hsigmoid_loss x = paddle.to_tensor(np.reshape(x_arr, (10, 0)), dtype='float32') label = paddle.to_tensor([], dtype='int64') weight = paddle.to_tensor([], dtype='float32') self.assertRaises(ValueError, F.hsigmoid_loss, x, label, 0, weight) paddle.enable_static() # test paddle.fluid.layers.hsigmoid with program_guard(Program()): label = fluid.data('label', [4, 1], 'int64') # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.hsigmoid, 1, label, 2) # The input dtype must be float16, float32, float64. x_int32 = fluid.data(name='x_int32', shape=[4, 3], dtype='int32') self.assertRaises(TypeError, fluid.layers.hsigmoid, x_int32, label, 2) # support the input dtype is float32 x_fp32 = fluid.data(name='x_fp32', shape=[4, 3], dtype='float32') fluid.layers.hsigmoid(x_fp32, label, 2) # The label type must be Variable. self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, 1, 2) # The label dtype must be int64. label_int32 = fluid.data('label_int32', [4, 1], 'int32') self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, label_int32, 2)
def create_fake_model(program_config): ''' Create a Paddle model(in memory) according to the given config. ''' paddle.enable_static() main_program_desc = core.ProgramDesc() util_program = fluid.Program() main_block_desc = main_program_desc.block(0) var_desc = main_block_desc.var(cpt.to_bytes("feed")) var_desc.set_type(core.VarDesc.VarType.FEED_MINIBATCH) var_desc.set_persistable(True) index = 0 for name, tensor_config in program_config.inputs.items(): var_desc = main_block_desc.var(cpt.to_bytes(name)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(tensor_config.dtype)) var_desc.set_shape(tensor_config.shape) var_desc.set_need_check_feed(True) if tensor_config.lod is not None: var_desc.set_lod_level(len(tensor_config.lod)) op_desc = main_block_desc._prepend_op() op_desc.set_type("feed") op_desc.set_input('X', ["feed"]) op_desc.set_output('Out', [name]) op_desc._set_attr("col", index) index = index + 1 save_var_map = {} for name, tensor_config in program_config.weights.items(): var_desc = main_block_desc.var(cpt.to_bytes(name)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(tensor_config.dtype)) var_desc.set_shape(tensor_config.shape) var_desc.set_persistable(True) save_var_map[name] = util_program.global_block().create_parameter( dtype=tensor_config.dtype, shape=tensor_config.shape, type=core.VarDesc.VarType.LOD_TENSOR, name=name, initializer=NumpyArrayInitializer(tensor_config.data)) in_vars = [] for name in sorted(save_var_map.keys()): in_vars.append(save_var_map[name]) out_var = util_program.global_block().create_var( type=core.VarDesc.VarType.RAW, name="out_var_0") out_var.desc.set_persistable(True) util_program.global_block().append_op(type='save_combine', inputs={'X': in_vars}, outputs={'Y': out_var}, attrs={ 'file_path': '', 'save_to_memory': True }) for op_config in program_config.ops: op_desc = main_block_desc.append_op() op_desc.set_type(op_config.type) for name, values in op_config.inputs.items(): op_desc.set_input(name, values) for name, values in op_config.attrs.items(): op_desc._set_attr(name, values) for name, values in op_config.outputs.items(): op_desc.set_output(name, values) for v in values: var_desc = main_block_desc.var(cpt.to_bytes(v)) var_desc.set_type(core.VarDesc.VarType.LOD_TENSOR) var_desc.set_dtype(convert_np_dtype_to_dtype_(np.float32)) if op_config.outputs_dtype is not None and v in op_config.outputs_dtype.keys( ): var_desc.set_dtype( convert_np_dtype_to_dtype_(op_config.outputs_dtype[v])) op_desc.infer_var_type(main_block_desc) op_desc.infer_shape(main_block_desc) op_desc.check_attrs() for index, name in enumerate(program_config.outputs): var_desc = main_block_desc.var(cpt.to_bytes("fetch")) var_desc.set_type(core.VarDesc.VarType.FETCH_LIST) var_desc.set_need_check_feed(True) op_desc = main_block_desc.append_op() op_desc.set_type("fetch") op_desc.set_input('X', [name]) op_desc.set_output('Out', ["fetch"]) op_desc._set_attr("col", index) main_program_desc._set_version() paddle.fluid.core.save_op_version_info(main_program_desc) model = main_program_desc.serialize_to_string() util_program._sync_with_cpp() place = fluid.CPUPlace() executor = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): executor.run(util_program) params = scope.find_var("out_var_0").get_bytes() return model, params
def static_mode_guard(): paddle.enable_static() yield paddle.disable_static()
sorted_idxs = np.argsort(-rpn_scores, axis=-1) if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE: ret = distance_based_proposal(rpn_scores, proposals, sorted_idxs) else: ret = score_based_proposal(rpn_scores, proposals, sorted_idxs) return ret return generate_proposal if __name__ == "__main__": import paddle paddle.enable_static() np.random.seed(3333) x_np = np.random.random((4, 256, 84)).astype('float32') from config import cfg cfg.RPN.LOC_XZ_FINE = True # cfg.TEST.RPN_DISTANCE_BASED_PROPOSE = False # cfg.RPN.NMS_TYPE = 'rotate' proposal_func = get_proposal_func(cfg) x = fluid.data(name="x", shape=[None, 256, 84], dtype='float32') proposal = fluid.default_main_program().current_block().create_var( name="proposal", dtype='float32', shape=[256, 7]) fluid.layers.py_func(proposal_func, x, proposal) loss = fluid.layers.reduce_mean(proposal)
def run_static(self, place): paddle.enable_static() with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): input_tensor = paddle.static.data(name="x", shape=[6, 7, 8], dtype="float64") large_input_tensor = paddle.static.data(name="large_x", shape=[2, 1030], dtype="float64") k_tensor = paddle.static.data(name="k", shape=[1], dtype="int32") result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) result3 = paddle.topk(input_tensor, k=k_tensor, axis=1) self.assertEqual(result3[0].shape, (6, -1, 8)) self.assertEqual(result3[1].shape, (6, -1, 8)) result4 = paddle.topk(input_tensor, k=2, axis=1, largest=False) result5 = paddle.topk(input_tensor, k=2, axis=-1, largest=False) result6 = paddle.topk(large_input_tensor, k=1, axis=-1) result7 = paddle.topk(input_tensor, k=2, axis=1, sorted=False) exe = paddle.static.Executor(place) input_data = np.random.rand(10, 20).astype("float64") large_input_data = np.random.rand(2, 100).astype("float64") paddle_result = exe.run(feed={ "x": self.input_data, "large_x": self.large_input_data, "k": np.array([2]).astype("int32") }, fetch_list=[ result1[0], result1[1], result2[0], result2[1], result3[0], result3[1], result4[0], result4[1], result5[0], result5[1], result6[0], result6[1], result7[0], result7[1] ]) numpy_result = numpy_topk(self.input_data, k=2) self.assertTrue(np.allclose(paddle_result[0], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[1], numpy_result[1])) numpy_result = numpy_topk(self.input_data, k=2, axis=-1) self.assertTrue(np.allclose(paddle_result[2], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[3], numpy_result[1])) numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(paddle_result[4], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[5], numpy_result[1])) numpy_result = numpy_topk(self.input_data, k=2, axis=1, largest=False) self.assertTrue(np.allclose(paddle_result[6], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[7], numpy_result[1])) numpy_result = numpy_topk(self.input_data, k=2, axis=-1, largest=False) self.assertTrue(np.allclose(paddle_result[8], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[9], numpy_result[1])) numpy_result = numpy_topk(self.large_input_data, k=1, axis=-1) self.assertTrue(np.allclose(paddle_result[10], numpy_result[0])) self.assertTrue(np.allclose(paddle_result[11], numpy_result[1])) sort_paddle = numpy_topk(paddle_result[12], axis=1, k=2) numpy_result = numpy_topk(self.input_data, k=2, axis=1) self.assertTrue(np.allclose(sort_paddle[0], numpy_result[0]))