def __init__(self, func): self.seed = 33 np.random.seed(self.seed) # debug mode self.debug = False # if debug mode=True choose whether test dygrpah or static self.static = True self.enable_backward = True self.dtype = None # function for paddle api self.func = func self.types = [] self.places = [] self.backward_dtype = [np.float16, np.float32, np.float64] # no grad var self.no_grad_var = [] # calculate grad delta, You can rewrite these value self.delta = 1e-6 self.gap = 0.001 self.rtol = 1e-7 # choose layertypes [functional or classional] self._layertypes(func) # run hook, use user define vars and initials self.hook() # check self.types if not isinstance(self.types, list): raise TypeError("Types must be a list.") if len(self.types) == 0: raise TypeError("You must define types in hook function.") # 设置执行device if len(self.places) == 0 and fluid.is_compiled_with_cuda() is True: self.places = [fluid.CPUPlace(), fluid.CUDAPlace(7)] elif len(self.places) == 0 and paddle.is_compiled_with_npu() is True: self.places = [fluid.CPUPlace(), paddle.NPUPlace(7)] else: self.places = [fluid.CPUPlace()] # self.places = [paddle.NPUPlace(7)] if self.debug: logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') else: logging.basicConfig( level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "adam" param = np.random.uniform(-1, 1, (102, 105)).astype("float32") grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") # The second moment is positive moment2 = np.random.random((102, 105)).astype("float32") learning_rate = 0.004 beta1 = 0.78 beta2 = 0.836 epsilon = 1e-4 beta1_pow = beta1**10 beta2_pow = beta2**10 self.inputs = { 'Param': param, 'Grad': grad, 'Moment1': moment1, 'Moment2': moment2, 'LearningRate': np.array([learning_rate]).astype("float32"), 'Beta1Pow': np.array([beta1_pow]).astype("float32"), 'Beta2Pow': np.array([beta2_pow]).astype("float32"), 'Beta1Tensor': np.array([beta1]).astype("float32"), 'Beta2Tensor': np.array([beta2]).astype("float32"), 'EpsilonTensor': np.array([epsilon]).astype("float32"), } attributes = {'epsilon': epsilon} param_out, moment1_out, \ moment2_out = adam_step(self.inputs, attributes) self.attrs = {'use_global_beta_pow': True} # use_global_beta_pow=True, Beta1PowOut and Beta2PowOut are empty. self.outputs = { 'Moment1Out': moment1_out, 'Moment2Out': moment2_out, 'ParamOut': param_out, 'Beta1PowOut': np.array([]), 'Beta2PowOut': np.array([]) }
def setUp(self): self.initTestCase() self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = 'strided_slice' self.output = strided_slice_native_forward(self.input, self.axes, self.starts, self.ends, self.strides) self.inputs = {'Input': self.input} self.outputs = {'Out': self.output} self.attrs = { 'axes': self.axes, 'starts': self.starts, 'ends': self.ends, 'strides': self.strides, 'infer_flags': self.infer_flags }
def test_api(self): with fluid.dygraph.guard(paddle.NPUPlace(0)): np_x = np.random.random([12, 14]).astype("float32") x = paddle.to_tensor(np_x) positive_2 = np.array([2]).astype("int32") positive_2 = paddle.to_tensor(positive_2) repeat_times = np.array([2, 3]).astype("int32") repeat_times = paddle.to_tensor(repeat_times) out_1 = paddle.tile(x, repeat_times=[2, 3]) out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) out_3 = paddle.tile(x, repeat_times=repeat_times) assert np.array_equal(out_1.numpy(), np.tile(np_x, (2, 3))) assert np.array_equal(out_2.numpy(), np.tile(np_x, (2, 3))) assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3)))
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "expand_v2" self.init_data() self.dtype = np.float32 expand_shapes_tensor = [] for index, ele in enumerate(self.expand_shape): expand_shapes_tensor.append(("x" + str(index), np.ones( (1)).astype('int32') * ele)) self.inputs = { 'X': np.random.random(self.ori_shape).astype(self.dtype), 'expand_shapes_tensor': expand_shapes_tensor, } self.attrs = {"shape": self.infer_expand_shape} output = np.tile(self.inputs['X'], self.expand_times) self.outputs = {'Out': output}
def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" self.set_npu() self.config() self.inputs = { 'Input': self.input, "StridesTensor": np.array(self.strides, dtype="int32") } self.outputs = {'Out': self.output} self.attrs = { 'axes': self.axes, 'starts': self.starts, 'ends': self.ends, #'strides': self.strides, 'infer_flags': self.infer_flags, }
def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) self.out_size = None self.actual_shape = None self.init_test_case() self.op_type = "nearest_interp" self.shape_by_1Dtensor = False self.scale_by_1Dtensor = False self.attrs = { 'interp_method': self.interp_method, 'align_corners': self.align_corners, } input_np = np.random.random(self.input_shape).astype("float32") self.inputs = {'X': input_np} if self.scale_by_1Dtensor: self.inputs['Scale'] = np.array([self.scale]).astype("float64") elif self.scale > 0: out_h = int(self.input_shape[2] * self.scale) out_w = int(self.input_shape[3] * self.scale) self.attrs['scale'] = self.scale else: out_h = self.out_h out_w = self.out_w if self.shape_by_1Dtensor: self.inputs['OutSize'] = self.out_size elif self.out_size is not None: size_tensor = [] for index, ele in enumerate(self.out_size): size_tensor.append(("x" + str(index), np.ones( (1)).astype('int32') * ele)) self.inputs['SizeTensor'] = size_tensor self.attrs['out_h'] = self.out_h self.attrs['out_w'] = self.out_w output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, self.out_size, self.actual_shape, self.align_corners) self.outputs = {'Out': output_np}
def test_static(self): # NPU is not supported in ParallelExecutor prog = paddle.static.Program() with paddle.static.program_guard(prog): x_np = np.array([2, 3, 4]).astype('float32') y_np = np.array([1, 5, 2]).astype('float32') x = paddle.static.data(name="x", shape=[3], dtype='float32') y = paddle.static.data(name="y", shape=[3], dtype='float32') z = paddle.add(x, y) compiled_prog = paddle.static.CompiledProgram(prog) place = paddle.NPUPlace(0) exe = paddle.static.Executor(place) with self.assertRaisesRegex( RuntimeError, "NPU is not supported in ParallelExecutor"): exe.run(compiled_prog, feed={"x": x_np, "y": y_np})
def setUp(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) self.out_size = None self.actual_shape = None self.data_layout = 'NCHW' self.init_test_case() self.op_type = "nearest_interp" input_np = np.random.random(self.input_shape).astype("float32") if self.data_layout == "NCHW": in_h = self.input_shape[2] in_w = self.input_shape[3] else: in_h = self.input_shape[1] in_w = self.input_shape[2] if self.scale > 0: out_h = int(in_h * self.scale) out_w = int(in_w * self.scale) else: out_h = self.out_h out_w = self.out_w output_np = nearest_neighbor_interp_np(input_np, out_h, out_w, self.out_size, self.actual_shape, self.align_corners, self.data_layout) self.inputs = {'X': input_np} if self.out_size is not None: self.inputs['OutSize'] = self.out_size if self.actual_shape is not None: self.inputs['OutSize'] = self.actual_shape self.attrs = { 'out_h': self.out_h, 'out_w': self.out_w, 'scale': self.scale, 'interp_method': self.interp_method, 'align_corners': self.align_corners, 'data_layout': self.data_layout } self.outputs = {'Out': output_np}
def setUp(self): self.set_npu() self.set_example() self.op_type = "split" self.place = paddle.NPUPlace(0) ipt = self.x.astype(self.dtype) axis = self.axis if isinstance(self.axis, int) else int(self.axis[0]) tmp_outs = np.split( ipt, axis=axis, indices_or_sections=self.num_or_sections) tmp_outs = [o.astype(self.dtype) for o in tmp_outs] self.outputs = {'Out': []} self.outs = [] for i, o in enumerate(tmp_outs): self.outputs["Out"].append((str(i), o)) self.outs.append(str(i)) self.attrs = {"axis": self.axis, "num": self.num_or_sections} self.inputs = {} self.inputs.update({'X': ipt.astype(self.dtype)})
def test_out(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[1, 2], dtype='float32') data2 = fluid.layers.data('data2', shape=[1, 2], dtype='float32') data3 = fluid.layers.data('data3', shape=[1, 2], dtype='float32') result_stack = paddle.stack([data1, data2, data3], axis=0) place = paddle.NPUPlace(0) exe = fluid.Executor(place) input1 = np.random.random([1, 2]).astype('float32') input2 = np.random.random([1, 2]).astype('float32') input3 = np.random.random([1, 2]).astype('float32') result, = exe.run(feed={ "data1": input1, "data2": input2, "data3": input3 }, fetch_list=[result_stack]) expected_result = np.stack([input1, input2, input3], axis=0) self.assertTrue(np.allclose(expected_result, result))
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "smooth_l1_loss" dims = (5, 20) self.inputs = { 'X': np.random.random(dims).astype("float32"), 'Y': np.random.random(dims).astype("float32") } sigma = 3.0 self.attrs = {'sigma': sigma} sigma2 = sigma * sigma diff = self.inputs['X'] - self.inputs['Y'] loss = np.vectorize(smooth_l1_loss_forward)(diff, sigma2).sum(1) loss = loss.reshape((dims[0], 1)) self.outputs = { 'Diff': diff.astype('float32'), 'Out': loss.astype('float32') }
def test_static_mode(self): shape = [8, 9, 6] x = paddle.fluid.data(shape=shape, dtype='float32', name='x') mask = paddle.fluid.data(shape=shape, dtype='bool', name='mask') np_x = np.random.random(shape).astype('float32') np_mask = np.array(np.random.randint(2, size=shape, dtype=bool)) out = paddle.masked_select(x, mask) np_out = np_masked_select(np_x, np_mask) exe = paddle.static.Executor(place=paddle.NPUPlace(0)) res = exe.run(paddle.static.default_main_program(), feed={ "x": np_x, "mask": np_mask }, fetch_list=[out]) self.assertEqual(np.allclose(res, np_out), True)
def test_type_error(unit_test, use_npu, type_str_map): def check_type(op_str, x, y, binary_op): op = getattr(paddle, op_str) error_type = ValueError if isinstance(x, np.ndarray): x = paddle.to_tensor(x) y = paddle.to_tensor(y) error_type = BaseException if binary_op: if type_str_map['x'] != type_str_map['y']: unit_test.assertRaises(error_type, op, x=x, y=y) if not fluid.in_dygraph_mode(): error_type = TypeError unit_test.assertRaises(error_type, op, x=x, y=y, out=1) else: if not fluid.in_dygraph_mode(): error_type = TypeError unit_test.assertRaises(error_type, op, x=x, out=1) place = paddle.CPUPlace() if use_npu and fluid.core.is_compiled_with_npu(): place = paddle.NPUPlace(0) for op_data in TEST_META_OP_DATA: meta_data = dict(op_data) binary_op = meta_data['binary_op'] paddle.disable_static(place) x = np.random.choice(a=[0, 1], size=[10]).astype(type_str_map['x']) y = np.random.choice(a=[0, 1], size=[10]).astype(type_str_map['y']) check_type(meta_data['op_str'], x, y, binary_op) paddle.enable_static() startup_program = paddle.static.Program() main_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): x = paddle.static.data(name='x', shape=[10], dtype=type_str_map['x']) y = paddle.static.data(name='y', shape=[10], dtype=type_str_map['y']) check_type(meta_data['op_str'], x, y, binary_op)
def setUp(self): self.place = paddle.NPUPlace(0) self.op_type = "strided_slice" self.config() self.set_npu() ends_tensor = [] for index, ele in enumerate(self.ends): ends_tensor.append(("x" + str(index), np.ones( (1)).astype('int32') * ele)) self.inputs = {'Input': self.input, 'EndsTensorList': ends_tensor} self.outputs = {'Out': self.output} self.attrs = { 'axes': self.axes, 'starts': self.starts, 'ends': self.ends_infer, 'strides': self.strides, 'infer_flags': self.infer_flags }
def _run_static_parallel(use_cuda, use_xpu, use_npu, device_list): """ Testing the simple network in data parallel mode, using multiple CPU/GPU. Args: use_cuda (bool): Whether running with CUDA. use_xpu (bool): Whether running with XPU. use_npu (bool): Whether running with NPU. device_list (int): The specified devices. """ paddle.enable_static() with paddle.static.scope_guard(paddle.static.Scope()): train_prog = paddle.static.Program() startup_prog = paddle.static.Program() with paddle.static.program_guard(train_prog, startup_prog): input, out, _ = _simple_network() loss = paddle.tensor.mean(out) loss.persistable = True paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) compiled_prog = paddle.static.CompiledProgram( train_prog).with_data_parallel(loss_name=loss.name, places=device_list) if use_cuda: place = paddle.CUDAPlace(0) elif use_xpu: place = paddle.XPUPlace(0) compiled_prog = train_prog elif use_npu: place = paddle.NPUPlace(0) compiled_prog = train_prog else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) exe.run(compiled_prog, feed={input.name: _prepare_data(len(device_list))}, fetch_list=[loss.name]) paddle.disable_static()
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(32, 1)).astype('float32') label_np = np.random.randint(2, size=(32, 1)).astype('int64') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[32, 1], dtype='float32') label = paddle.static.data(name="label", shape=[32, 1], dtype='int64') res = paddle.fluid.layers.expand(a, [1, 32]) loss = res.sum() sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) for epoch in range(100): loss_res = exe.run(main_prog, feed={ "a": a_np, "label": label_np }, fetch_list=[loss]) if epoch % 10 == 0: print("Epoch {} | Loss: {}".format(epoch, loss)) return loss_res
def setUp(self): self.set_npu() self.op_type = "concat" self.place = paddle.NPUPlace(0) self.init_dtype() self.init_test_data() self.inputs = { 'X': [('x0', self.x0), ('x1', self.x1), ('x2', self.x2)] } self.attrs = {'axis': self.axis} if self.axis < 0: self.actual_axis = self.axis + len(self.x0.shape) self.actual_axis = self.actual_axis if self.actual_axis > 0 else 0 else: self.actual_axis = self.axis self.outputs = { 'Out': np.concatenate((self.x0, self.x1, self.x2), axis=self.actual_axis) }
def run_static(x_np, y_np, op_str, use_npu=False, binary_op=True): paddle.enable_static() startup_program = fluid.Program() main_program = fluid.Program() place = paddle.CPUPlace() if use_npu and fluid.core.is_compiled_with_npu(): place = paddle.NPUPlace(0) exe = fluid.Executor(place) with fluid.program_guard(main_program, startup_program): x = paddle.static.data(name='x', shape=x_np.shape, dtype=x_np.dtype) op = getattr(paddle, op_str) feed_list = {'x': x_np} if not binary_op: res = op(x) else: y = paddle.static.data(name='y', shape=y_np.shape, dtype=y_np.dtype) feed_list['y'] = y_np res = op(x, y) exe.run(startup_program) static_result = exe.run(main_program, feed=feed_list, fetch_list=[res]) return static_result
def test_simple_net(self): paddle.enable_static() main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): loss, sum_result = self.simple_net() append_backward(loss) npu_place = paddle.NPUPlace(0) exe = Executor(npu_place) d = [] for i in range(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_api_with_dygraph_tuple_input(self): paddle.disable_static(paddle.NPUPlace(0)) input_3 = np.random.randint(0, 100, [ 100, ]).astype('int32') input_4 = np.random.randint(0, 100, [ 200, ]).astype('int32') out_3 = np.reshape(input_3, [100, 1]) out_3 = np.broadcast_to(out_3, [100, 200]) out_4 = np.reshape(input_4, [1, 200]) out_4 = np.broadcast_to(out_4, [100, 200]) tensor_3 = paddle.to_tensor(input_3) tensor_4 = paddle.to_tensor(input_4) res_3, res_4 = paddle.tensor.meshgrid((tensor_3, tensor_4)) self.assertTrue(np.allclose(res_3.numpy(), out_3)) self.assertTrue(np.allclose(res_4.numpy(), out_4)) paddle.enable_static()
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "adamw" param = np.random.uniform(-1, 1, (102, 105)).astype("float32") grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") # The second moment is positive moment2 = np.random.random((102, 105)).astype("float32") learning_rate = 0.004 beta1 = 0.78 beta2 = 0.836 epsilon = 1e-4 beta1_pow = beta1**10 beta2_pow = beta2**10 self.inputs = { 'Param': param, 'Grad': grad, 'Moment1': moment1, 'Moment2': moment2, 'LearningRate': np.array([learning_rate]).astype("float32"), 'Beta1Pow': np.array([beta1_pow]).astype("float32"), 'Beta2Pow': np.array([beta2_pow]).astype("float32"), 'Beta1Tensor': np.array([beta1]).astype("float32"), 'Beta2Tensor': np.array([beta2]).astype("float32"), 'EpsilonTensor': np.array([epsilon]).astype("float32"), "SkipUpdate": np.array([True]).astype("bool"), } self.attrs = {'epsilon': epsilon, "coeff": 0.02, "with_decay": True} self.outputs = { 'Moment1Out': moment1, 'Moment2Out': moment2, 'ParamOut': param, 'Beta1PowOut': self.inputs['Beta1Pow'], 'Beta2PowOut': self.inputs['Beta2Pow'], }
def _test(self, run_npu=True): main_prog = paddle.static.Program() startup_prog = paddle.static.Program() main_prog.random_seed = SEED startup_prog.random_seed = SEED np.random.seed(SEED) a_np = np.random.random(size=(2, 3, 4)).astype('float32') b_np = np.random.random(size=(2, 3, 4)).astype('float32') with paddle.static.program_guard(main_prog, startup_prog): a = paddle.static.data(name="a", shape=[2, 3, 4], dtype='float32') b = paddle.static.data(name="b", shape=[2, 3, 4], dtype='float32') z = paddle.add(a, b) loss = fluid.layers.reduce_sum(z) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) if run_npu: place = paddle.NPUPlace(0) else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(startup_prog) print("Start run on {}".format(place)) for epoch in range(100): loss_res = exe.run(main_prog, feed={ "a": a_np, "b": b_np }, fetch_list=[loss]) if epoch % 10 == 0: print("Epoch {} | Loss: {}".format(epoch, loss_res)) return loss_res, loss_res
def setUp(self): paddle.enable_static() self.set_npu() self.op_type = "hard_swish" self.place = paddle.NPUPlace(0) self.init_dtype() x = np.random.uniform(-6, 6, [10, 12]).astype(self.dtype) threshold = 6.0 scale = 6.0 offset = 3.0 #the same with TestAbs x[np.abs(x + offset) < 0.005] = 0.02 x[np.abs(x - threshold + offset) < 0.005] = threshold - offset + 0.02 out = (x * (np.minimum(np.maximum(x + offset, 0.), threshold) / scale)).astype(self.dtype) self.x_grad = ref_hard_swish_grad(x, threshold, scale, offset) self.inputs = {'X': x} self.attrs = {'threshold': threshold, 'scale': scale, 'offset': offset} self.outputs = {'Out': out}
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "adam" param = np.random.uniform(-1, 1, (102, 105)).astype("float32") grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") # The second moment is positive moment2 = np.random.random((102, 105)).astype("float32") learning_rate = 0.004 beta1 = 0.78 beta2 = 0.836 epsilon = 1e-4 beta1_pow = beta1**10 beta2_pow = beta2**10 self.inputs = { 'Param': param, 'Grad': grad, 'Moment1': moment1, 'Moment2': moment2, 'LearningRate': np.array([learning_rate]).astype("float32"), 'Beta1Pow': np.array([beta1_pow]).astype("float32"), 'Beta2Pow': np.array([beta2_pow]).astype("float32") } self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} param_out, moment1_out, \ moment2_out = adam_step(self.inputs, self.attrs) self.outputs = { 'Moment1Out': moment1_out, 'Moment2Out': moment2_out, 'ParamOut': param_out, 'Beta1PowOut': np.array([beta1_pow]).astype("float32") * beta1, 'Beta2PowOut': np.array([beta2_pow]).astype("float32") * beta2 }
def setUp(self): self.set_npu() self.init_dtype() self.op_type = "sum" self.place = paddle.NPUPlace(0) x0 = np.random.random((3, 3)).astype(self.dtype) x1 = np.random.random((3, 3)).astype(self.dtype) x2 = np.random.random((3, 3)).astype(self.dtype) x3 = np.random.random((3, 3)).astype(self.dtype) self.inputs = {'X': [("x0", x0), ("x1", x1), ("x2", x2), ("x3", x3)]} # There will be a problem if just using `y=x0+x1+x2+x3` to calculate the # summation result as the reference standard result. The reason is that # numpy's fp16 data has precision loss when doing `add` operation. # For example, the results of `x0+x1+x2+x3` is different from that of # `x3+x2+x1+x0` if the dtype is fp16. # Therefore, converting the input to fp32 for calculation. y = (x0.astype(np.float32) + x1.astype(np.float32) + x2.astype(np.float32) + x3.astype(np.float32)).astype(self.dtype) self.outputs = {'Out': y} self.attrs = {'use_mkldnn': False}
def setUp(self): np.random.seed(SEED) self.set_npu() self.init_dtype() self.place = paddle.NPUPlace(0) self.init_op_type() self.initTestCase() self.use_mkldnn = False self.attrs = { 'dim': self.axis, 'keep_dim': self.keep_dim, 'reduce_all': self.reduce_all } self.inputs = {'X': np.random.random(self.shape).astype(self.dtype)} if self.attrs['reduce_all']: self.outputs = {'Out': self.inputs['X'].sum()} else: self.outputs = { 'Out': self.inputs['X'].sum(axis=self.axis, keepdims=self.attrs['keep_dim']) }
def setUp(self): self.set_npu() self.place = paddle.NPUPlace(0) self.op_type = "beam_search" self.init_data() self.inputs = { 'pre_ids': (self.pre_ids, self.lod), 'pre_scores': (self.pre_score, self.lod), 'ids': (self.ids, self.lod), 'scores': (self.score, self.lod) } # The `target_lod` attribute is still based on offset self.attrs = { 'level': 0, 'beam_size': self.beam_size, 'end_id': 0, 'is_accumulated': self.is_accumulated } self.outputs = { 'selected_ids': (self.selected_ids, self.out_lod), 'selected_scores': (self.selected_scores, self.out_lod), 'parent_idx': self.parent_idx }
def setUp(self): self.set_npu() self.op_type = "update_loss_scaling" self.place = paddle.NPUPlace(0) self.init() found_inf = np.array([False], dtype=np.bool) x = np.random.random((1024, 1024)).astype(self.dtype) self.inputs = { 'X': [('x0', x)], 'FoundInfinite': found_inf, 'PrevLossScaling': self.prev_loss_scaling, 'InGoodSteps': self.num_good_steps, 'InBadSteps': self.num_bad_steps } self.outputs = { 'Out': [('out0', x)], 'LossScaling': self.prev_loss_scaling * self.incr_ratio, 'OutGoodSteps': self.zero_steps, 'OutBadSteps': self.zero_steps }
def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0)