def check_output_customized(self, checker): places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type): places.append(core.CUDAPlace(0)) for place in places: outs = self.calc_output(place) outs = [np.array(out) for out in outs] checker(outs)
def net_profiler(self, state, profile_path='/tmp/profile'): enable_if_gpu = state == 'GPU' or state == "All" if enable_if_gpu and not core.is_compiled_with_cuda(): return startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros( shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: for iter in range(10): if iter == 2: profiler.reset_profiler() x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, feed={'x': x, 'y': y}, fetch_list=[avg_cost, batch_acc, batch_size]) acc = np.array(outs[1]) b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval()
def test_check_output(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): places.append(core.CUDAPlace(0)) for place in places: for data_format in ["NCHW", "NHWC"]: self.check_with_place(place, data_format, self.dtype, [2, 3, 4, 5]) self.check_with_place(place, data_format, self.dtype, [2, 3])
def run_program(self): outputs = [] places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=False) outputs.append(output) self.actual_outputs = outputs
def check_grad(self, inputs_to_check, output_names, no_grad_set=None, numeric_grad_delta=0.005, in_place=False, max_relative_error=0.005, user_defined_grads=None): places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type): places.append(core.CUDAPlace(0)) for place in places: self.check_grad_with_place(place, inputs_to_check, output_names, no_grad_set, numeric_grad_delta, in_place, max_relative_error, user_defined_grads)
def run_program(self): """Run the test program. """ places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=True) self.op_output = output
def test_check_output(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): self.check_output_with_place(place, atol=1e-3)
def test_scale_selected_rows_inplace(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place, 'in', 'in')
def test_case1(self): data1 = fluid.layers.data(name='data1', shape=[3, 5, 5], dtype='float32') data2 = fluid.layers.data(name='data2', shape=[5, 5, 3], dtype='float32') out1 = fluid.layers.conv2d_transpose(input=data1, groups=1, num_filters=6, filter_size=3, data_format='NCHW') out2 = fluid.layers.conv2d_transpose(input=data2, groups=1, num_filters=6, filter_size=3, data_format='NHWC') out3 = fluid.layers.conv2d_transpose(input=data1, groups=1, num_filters=6, filter_size=3, padding=[[0, 0], [1, 1], [1, 1], [0, 0]], data_format='NHWC') out4 = fluid.layers.conv2d_transpose(input=data1, groups=3, num_filters=6, filter_size=3, padding=[[0, 0], [0, 0], [2, 1], [0, 0]], data_format='NCHW') out5 = fluid.layers.conv2d_transpose(input=data2, groups=1, num_filters=6, filter_size=3, padding='SAME', data_format='NCHW') out6 = fluid.layers.conv2d_transpose(input=data1, groups=1, num_filters=6, filter_size=3, padding='VALID', data_format='NHWC') out7 = fluid.layers.conv2d_transpose(input=data1, groups=1, num_filters=6, output_size=[7, 7], padding=[0, 0], data_format='NHWC') data1_np = np.random.random((2, 3, 5, 5)).astype("float32") data2_np = np.random.random((2, 5, 5, 3)).astype("float32") if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) results = exe.run( fluid.default_main_program(), feed={ "data1": data1_np, "data2": data2_np }, fetch_list=[out1, out2, out3, out4, out5, out6, out7], return_numpy=True) self.assertIsNotNone(results[0]) self.assertIsNotNone(results[1]) self.assertIsNotNone(results[2]) self.assertIsNotNone(results[3]) self.assertIsNotNone(results[4]) self.assertIsNotNone(results[5]) self.assertIsNotNone(results[6])
def has_cuda(self): return core.is_compiled_with_cuda()
def testSetNumpyBeforeTrain(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [0.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) # set lr to 0.0, not update parameter new_lr = 0.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr), beta1=0.8, beta2=0.6, parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None np_opti_dict = {} np_state_dict = {} for k, v in self.opti_dict.items(): np_opti_dict[v.name] = v.numpy() for k, v in self.state_dict.items(): np_state_dict[k] = v.numpy() adam.set_dict(np_opti_dict) ptb_model.set_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() opti_dict = adam.state_dict() for k, v in opti_dict.items(): if k == "global_step": self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) # check parameter state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
def has_cudnn(self): return core.is_compiled_with_cuda() and self.use_cudnn
def get_places(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) return places
def test_check_output_with_place(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=1e-5)
def test_case(self): self.assertEqual(core.is_compiled_with_cuda(), core.op_support_gpu("sum"))
def testcudnn(self): return core.is_compiled_with_cuda() and self.use_cudnn
def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) sgd = SGDOptimizer(learning_rate=1e-3) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() sgd.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() dy_loss_value = dy_loss.numpy() dy_last_cell_value = last_cell.numpy() dy_last_hidden_value = last_hidden.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) x = fluid.layers.data(name="x", shape=[-1, num_steps, 1], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data(name="init_hidden", shape=[1], dtype='float32') init_cell = fluid.layers.data(name="init_cell", shape=[1], dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) sgd.minimize(static_loss) static_param_updated = dict() static_param_init = dict() static_param_name_list = list() for param in ptb_model.parameters(): static_param_name_list.append(param.name) out = exe.run(framework.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init[static_param_name_list[i]] = out[i] static_loss_value = None static_last_cell_value = None static_last_hidden_value = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') fetch_list = [ static_loss, static_last_hidden, static_last_cell ] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "x": x_data, "y": y_data, "init_hidden": init_hidden_data, "init_cell": init_cell_data }, fetch_list=fetch_list) static_loss_value = out[0] static_last_hidden_value = out[1] static_last_cell_value = out[2] if i == batch_num - 1: for k in range(3, len(out)): static_param_updated[static_param_name_list[ k - 3]] = out[k] self.assertTrue(np.array_equal(static_loss_value, dy_loss_value)) self.assertTrue( np.array_equal(static_last_cell_value, dy_last_cell_value)) self.assertTrue( np.array_equal(static_last_hidden_value, dy_last_hidden_value)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): self.assertTrue(np.array_equal(value, dy_param_updated[key]))
def test_check_grad_gpu(self): if core.is_compiled_with_cuda(): self.check_grad_with_place(core.CUDAPlace(0), ["Bias", "W", "Input"], "Out")
def test_check_output_gpu(self): if core.is_compiled_with_cuda(): self.check_output_with_place(core.CUDAPlace(0))
def test_check_output(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): self.check_output_with_place(place, atol=1e-3)
def check_forward_backward(self, shape, begin_norm_axis, has_scale=True, has_bias=True, y_grad_scale=1.0, use_mkldnn=False): def test_with_place(place, shape, begin_norm_axis, use_mkldnn=use_mkldnn): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype( np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, bias, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD'] if has_scale: var_names += ['scale'] if has_bias: var_names += ['bias'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', 'mean', 'variance', 'x@GRAD', ] if has_scale: inputs["Scale"] = block.var('scale') fetch_list += ['scale@GRAD'] if has_bias: inputs["Bias"] = block.var('bias') fetch_list += ['bias@GRAD'] layer_norm_op = block.append_op( type="layer_norm", inputs=inputs, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis, "use_mkldnn": use_mkldnn }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=fetch_list) # print(y) # print(out[0]) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") if has_scale: self.__assert_close(scale_grad, out[fetch_list.index('scale@GRAD')], "scale_grad", 1e-3) if has_bias: self.__assert_close(bias_grad, out[fetch_list.index('bias@GRAD')], "bias_grad") places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu( "layer_norm") and self.use_cudnn: places.append(core.CUDAPlace(0)) for place in places: test_with_place(place, shape, begin_norm_axis)
def test_gpu(self): if core.is_compiled_with_cuda(): self.gaussian_random_test(place=fluid.CUDAPlace(0))
def run_boxps_preload(self, is_cpu=True): x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0) y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0) emb_x, emb_y = _pull_box_sparse([x, y], size=2) emb_xp = _pull_box_sparse(x, size=2) concat = layers.concat([emb_x, emb_y], axis=1) fc = layers.fc(input=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False) loss = layers.reduce_mean(fc) layers.Print(loss) place = fluid.CPUPlace( ) if is_cpu or not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place) batch_size = 2 def binary_print(slot, fout): fout.write(str(len(slot)) + " ") for e in slot: fout.write(str(e) + " ") batch1 = np.ones( (batch_size, 2, 1)).astype("int64").reshape(batch_size, 2, 1) filelist = [] place_str = "cpu" if is_cpu else "gpu" for i in range(2): filelist.append("test_hdfs_" + place_str + "_" + str(i)) for f in filelist: with open(f, "w") as fout: for ins in batch1: for slot in ins: binary_print(slot, fout) fout.write("\n") def create_dataset(): dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset.set_date("20190930") dataset.set_use_var([x, y]) dataset.set_batch_size(2) dataset.set_thread(1) dataset.set_filelist(filelist) return dataset datasets = [] datasets.append(create_dataset()) datasets.append(create_dataset()) optimizer = fluid.optimizer.SGD(learning_rate=0.5) optimizer = fluid.optimizer.PipelineOptimizer(optimizer, cut_list=[], place_list=[place], concurrency_list=[1], queue_size=1, sync_steps=-1) optimizer.minimize(loss) exe.run(fluid.default_startup_program()) datasets[0].load_into_memory() datasets[0].begin_pass() datasets[1].preload_into_memory() exe.train_from_dataset(program=fluid.default_main_program(), dataset=datasets[0], print_period=1) datasets[0].end_pass(True) datasets[1].wait_preload_done() datasets[1].begin_pass() exe.train_from_dataset(program=fluid.default_main_program(), dataset=datasets[1], print_period=1, debug=True) datasets[1].end_pass(False) for f in filelist: os.remove(f)
def init_datatype(self): if core.is_compiled_with_cuda(): self.dtype = 'float16'
def test_slice(self): place = fluid.CPUPlace() self._test_slice(place) if core.is_compiled_with_cuda(): self._test_slice(core.CUDAPlace(0))
def testSetNumpy(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) new_lr = 1.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr), parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() # check optimizer opti_dict = adam.state_dict() np_opti_dict = {} # set to zero for k, v in opti_dict.items(): np_t = v.numpy() np_opti_dict[v.name] = np_t var = v.value().get_tensor() var.set(np.zeros_like(np_t), place) self.assertTrue(np.sum(np.abs(v.numpy())) == 0) if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 adam.set_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name])) # check parameter state_dict = ptb_model.state_dict() np_state_dict = {} for k, v in state_dict.items(): np_t = v.numpy() np_state_dict[k] = np_t var = v.value().get_tensor() var.set(np.zeros_like(np_t), place) ptb_model.set_dict(np_state_dict) state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
def test_mnist_float32(self): seed = 90 epoch_num = 1 batch_size = 128 batch_num = 50 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( paddle.batch(self.reader_decorator( paddle.dataset.mnist.train()), batch_size=batch_size, drop_last=True), places=fluid.CPUPlace()) mnist.train() dy_param_init_value = {} for epoch in range(epoch_num): for batch_id, data in enumerate(batch_py_reader()): if batch_id >= batch_num: break img = data[0] dy_x_data = img.numpy() label = data[1] label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in mnist.parameters(): dy_param_init_value[param.name] = param.numpy() avg_loss.backward() sgd.minimize(avg_loss) mnist.clear_gradients() dy_param_value = {} for param in mnist.parameters(): dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size, drop_last=True) img = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) sgd.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in mnist.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: break static_x_data = np.array([ x[0].reshape(1, 28, 28) for x in data ]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "pixel": static_x_data, "label": y_data }, fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): static_param_value[static_param_name_list[i - 1]] = out[i] self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
def test_check_output(self): if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) self.assertTrue( PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace()
def test_copy_and_copy_to(self): print("Test_copy_and_copy_to") with _test_eager_guard(): paddle.set_device("cpu") arr = np.ones([4, 16, 16, 32]).astype('float32') arr1 = np.zeros([4, 16]).astype('float32') arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( [4, 16, 16, 32]).astype('float32') tensor = paddle.to_tensor(arr, core.VarDesc.VarType.FP32, core.CPUPlace()) self.assertEqual(tensor.stop_gradient, True) tensor.stop_gradient = False print("Set persistable") tensor.persistable = False tensor1 = paddle.to_tensor(arr1, core.VarDesc.VarType.FP32, core.CPUPlace()) tensor1.persistable = True self.assertEqual(tensor1.stop_gradient, True) self.assertTrue(np.array_equal(tensor.numpy(), arr)) print("Test copy_") tensor.copy_(tensor1, True) self.assertEqual(tensor.persistable, False) self.assertEqual(tensor.shape, [4, 16]) self.assertEqual(tensor.dtype, core.VarDesc.VarType.FP32) self.assertTrue(np.array_equal(tensor.numpy(), arr1)) print("Test _copy_to") tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, core.CPUPlace()) self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) self.assertTrue(tensor2.place.is_cpu_place()) tensor2.persistable = True tensor2.stop_gradient = False if core.is_compiled_with_cuda(): tensor3 = tensor2._copy_to(core.CUDAPlace(0), True) self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) self.assertEqual(tensor3.persistable, True) self.assertEqual(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_gpu_place()) tensor4 = tensor2.cuda(0, True) self.assertTrue(np.array_equal(tensor4.numpy(), arr2)) self.assertEqual(tensor4.persistable, True) self.assertEqual(tensor4.stop_gradient, False) self.assertTrue(tensor4.place.is_gpu_place()) tensor5 = tensor4.cpu() self.assertTrue(np.array_equal(tensor5.numpy(), arr2)) self.assertEqual(tensor5.persistable, True) self.assertEqual(tensor5.stop_gradient, False) self.assertTrue(tensor5.place.is_cpu_place()) tensor10 = paddle.to_tensor([1, 2, 3], place='gpu_pinned') tensor11 = tensor10._copy_to(core.CUDAPlace(0), True) self.assertTrue( np.array_equal(tensor10.numpy(), tensor11.numpy())) else: tensor3 = tensor2._copy_to(core.CPUPlace(), True) self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) self.assertEqual(tensor3.persistable, True) self.assertEqual(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) tensor4 = tensor2.cpu() self.assertTrue(np.array_equal(tensor4.numpy(), arr2)) self.assertEqual(tensor4.persistable, True) self.assertEqual(tensor4.stop_gradient, False) self.assertTrue(tensor4.place.is_cpu_place())
def setUp(self): if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace() self.data = np.random.rand(2, 3, 4).astype("float32")
def setUp(self): self.place = paddle.CUDAPlace( 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.ones([1, 2, 3, 4]).astype('float32')
def setUp(self): self.input_data = np.random.rand(10, 10) if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace()
def test_check_grad(self): if self.use_cudnn or self.dtype == np.float16: place = core.CUDAPlace(0) if core.is_float16_supported(place): self.check_grad_with_place( place, ["X"], "Out", max_relative_error=0.01) else: self.check_grad(["X"], "Out", max_relative_error=0.01) class TestSoftmaxOp2(TestSoftmaxOp): def get_x_shape(self): return [2, 3, 4, 5] @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp(TestSoftmaxOp): def init_kernel_type(self): self.use_cudnn = True @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA") class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp): def get_x_shape(self): return [2, 3, 4, 5] @unittest.skipIf(not core.is_compiled_with_cuda(), "core is not compiled with CUDA")
def test_gan_float32(self): seed = 90 startup = fluid.Program() startup.random_seed = seed discriminate_p = fluid.Program() generate_p = fluid.Program() discriminate_p.random_seed = seed generate_p.random_seed = seed scope = fluid.core.Scope() with new_program_scope(main=discriminate_p, startup=startup, scope=scope): discriminator = Discriminator("d") generator = Generator("g") img = fluid.layers.data(name="img", shape=[2, 1], append_batch_size=False) noise = fluid.layers.data(name="noise", shape=[2, 2], append_batch_size=False) d_real = discriminator(img) d_loss_real = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_real, label=fluid.layers.fill_constant(shape=[2, 1], dtype='float32', value=1.0))) d_fake = discriminator(generator(noise)) d_loss_fake = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, label=fluid.layers.fill_constant(shape=[2, 1], dtype='float32', value=0.0))) d_loss = d_loss_real + d_loss_fake sgd = SGDOptimizer(learning_rate=1e-3) sgd.minimize(d_loss) with new_program_scope(main=generate_p, startup=startup, scope=scope): discriminator = Discriminator("d") generator = Generator("g") noise = fluid.layers.data(name="noise", shape=[2, 2], append_batch_size=False) d_fake = discriminator(generator(noise)) g_loss = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, label=fluid.layers.fill_constant(shape=[2, 1], dtype='float32', value=1.0))) sgd = SGDOptimizer(learning_rate=1e-3) sgd.minimize(g_loss) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) static_params = dict() with fluid.scope_guard(scope): img = np.ones([2, 1], np.float32) noise = np.ones([2, 2], np.float32) exe.run(startup) static_d_loss = exe.run(discriminate_p, feed={ 'img': img, 'noise': noise }, fetch_list=[d_loss])[0] static_g_loss = exe.run(generate_p, feed={'noise': noise}, fetch_list=[g_loss])[0] # generate_p contains all parameters needed. for param in generate_p.global_block().all_parameters(): static_params[param.name] = np.array( scope.find_var(param.name).get_tensor()) dy_params = dict() with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed discriminator = Discriminator("d") generator = Generator("g") sgd = SGDOptimizer(learning_rate=1e-3) d_real = discriminator(to_variable(np.ones([2, 1], np.float32))) d_loss_real = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_real, label=to_variable(np.ones([2, 1], np.float32)))) d_fake = discriminator( generator(to_variable(np.ones([2, 2], np.float32)))) d_loss_fake = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, label=to_variable(np.zeros([2, 1], np.float32)))) d_loss = d_loss_real + d_loss_fake d_loss._backward() sgd.minimize(d_loss) discriminator.clear_gradients() generator.clear_gradients() d_fake = discriminator( generator(to_variable(np.ones([2, 2], np.float32)))) g_loss = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits( x=d_fake, label=to_variable(np.ones([2, 1], np.float32)))) g_loss._backward() sgd.minimize(g_loss) for p in discriminator.parameters(): dy_params[p.name] = p._numpy() for p in generator.parameters(): dy_params[p.name] = p._numpy() dy_g_loss = g_loss._numpy() dy_d_loss = d_loss._numpy() self.assertEqual(dy_g_loss, static_g_loss) self.assertEqual(dy_d_loss, static_d_loss) for k, v in six.iteritems(dy_params): self.assertTrue(np.allclose(v, static_params[k]))
def check_forward_backward(self, shape, begin_norm_axis): def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) layer_norm_op = block.append_op( type="layer_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), }, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3) self.__assert_close(bias_grad, out[5], "bias_grad") places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): places.append(core.CUDAPlace(0)) for place in places: test_with_place(place, shape, begin_norm_axis)
def test_nested_net_with_backward_and_lodtensor(self): def external_cond(i, j, x, mem_array): return layers.less_than(i, array_len) def external_body(i, j, x, mem_array): def internal_cond(j, x, mem_array): return layers.less_than(j, array_len2) def internal_body(j, x, mem_array): inner_data = layers.array_read(array=data_array, i=j) inner_prev = layers.array_read(array=mem_array, i=j) inner_sum_0 = layers.elementwise_add(x=inner_data, y=inner_prev) inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0) j = layers.increment(x=j, in_place=True) layers.array_write(inner_sum_1, i=j, array=mem_array) return [j, x, mem_array] outer_data = layers.array_read(array=data_array, i=i) outer_prev = layers.array_read(array=mem_array, i=i) outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev) outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0) i = layers.increment(x=i, in_place=True) layers.array_write(outer_sum_1, i=i, array=mem_array) j, x, mem_array = layers.while_loop(internal_cond, internal_body, [j, x, mem_array]) return [i, j, x, mem_array] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): d0 = fluid.data(name='d0', shape=[10], dtype='float32') d1 = fluid.data(name='d1', shape=[10], dtype='float32') d2 = fluid.data(name='d2', shape=[10], dtype='float32') x = fluid.data(name='x', shape=[10], dtype='float32') x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=1) j = layers.fill_constant(shape=[1], dtype='int64', value=1) j.stop_gradient = True array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) out = layers.while_loop(external_cond, external_body, [i, j, x, mem_array]) sum_result = layers.array_read(array=mem_array, i=j) mean = layers.mean(sum_result) append_backward(mean) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) d = [] for i in range(3): d.append(np.random.random(size=[10]).astype('float32')) feed_x = np.ones(10).astype('float32') data_sum = d[0] + d[1] + d[2] + 3 * feed_x x_grad = [0.3] * 10 res = exe.run( main_program, feed={'d0': d[0], 'd1': d[1], 'd2': d[2], 'x': feed_x}, fetch_list=[sum_result.name, x.grad_name]) self.assertTrue(np.allclose(res[0], data_sum)) self.assertTrue(np.allclose(res[1], x_grad))
def check_forward_backward(self): def test_with_place(place): out_grad = np.random.random_sample(self.x.shape).astype(np.float32) x_grad = out_grad sum_axis = range(0, len(self.x.shape)) del sum_axis[self.axis] y_grad = np.sum(out_grad, axis=tuple(sum_axis)) var_dict = locals() var_dict['y'] = self.y var_dict['x'] = self.x var_dict['out'] = self.out var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['out@GRAD'] = out_grad var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) elementwise_add_op = block.append_op( type="elementwise_add", inputs={ "X": block.var('x'), "Y": block.var('y'), }, outputs={"Out": block.var('out'), }, attrs={"axis": self.axis, }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( elementwise_add_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'y', 'out@GRAD'] }, fetch_list=['x@GRAD', 'y@GRAD']) self.__assert_close(x_grad, out[0], "x@GRAD") self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4) places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu( "elementwise_add"): places.append(core.CUDAPlace(0)) for place in places: test_with_place(place)
def setUp(self): self.place = paddle.CUDAPlace( 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np) self.executed_api()
def test_sparse_adagrad(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place)
def _convert_to_place(device): lower_device = device.lower() if lower_device == 'cpu': place = core.CPUPlace() elif lower_device == 'gpu': if not core.is_compiled_with_cuda(): raise ValueError("The device should not be 'gpu', " "since PaddlePaddle is not compiled with CUDA") place = core.CUDAPlace(ParallelEnv().dev_id) elif lower_device == 'xpu': if not core.is_compiled_with_xpu(): raise ValueError("The device should not be 'xpu', " "since PaddlePaddle is not compiled with XPU") selected_xpus = os.getenv("FLAGS_selected_xpus", "0").split(",") device_id = int(selected_xpus[0]) place = core.XPUPlace(device_id) elif lower_device == 'npu': if not core.is_compiled_with_npu(): raise ValueError("The device should not be 'npu', " "since PaddlePaddle is not compiled with NPU") selected_npus = os.getenv("FLAGS_selected_npus", "0").split(",") device_id = int(selected_npus[0]) place = core.NPUPlace(device_id) elif lower_device == 'ipu': if not core.is_compiled_with_ipu(): raise ValueError( "The device should not be 'ipu', " \ "since PaddlePaddle is not compiled with IPU") place = core.IPUPlace() elif lower_device == 'mlu': if not core.is_compiled_with_mlu(): raise ValueError("The device should not be 'mlu', " "since PaddlePaddle is not compiled with MLU") selected_mlus = os.getenv("FLAGS_selected_mlus", "0").split(",") device_id = int(selected_mlus[0]) place = core.MLUPlace(device_id) else: avaliable_gpu_device = re.match(r'gpu:\d+', lower_device) avaliable_xpu_device = re.match(r'xpu:\d+', lower_device) avaliable_npu_device = re.match(r'npu:\d+', lower_device) avaliable_mlu_device = re.match(r'mlu:\d+', lower_device) if not avaliable_gpu_device and not avaliable_xpu_device and not avaliable_npu_device and not avaliable_mlu_device: raise ValueError( "The device must be a string which is like 'cpu', 'gpu', 'gpu:x', 'xpu', 'xpu:x', 'mlu', 'mlu:x', 'npu', 'npu:x' or ipu" ) if avaliable_gpu_device: if not core.is_compiled_with_cuda(): raise ValueError( "The device should not be {}, since PaddlePaddle is " "not compiled with CUDA".format(avaliable_gpu_device)) device_info_list = device.split(':', 1) device_id = device_info_list[1] device_id = int(device_id) place = core.CUDAPlace(device_id) if avaliable_xpu_device: if not core.is_compiled_with_xpu(): raise ValueError( "The device should not be {}, since PaddlePaddle is " "not compiled with XPU".format(avaliable_xpu_device)) device_info_list = device.split(':', 1) device_id = device_info_list[1] device_id = int(device_id) place = core.XPUPlace(device_id) if avaliable_npu_device: if not core.is_compiled_with_npu(): raise ValueError( "The device should not be {}, since PaddlePaddle is " "not compiled with NPU".format(avaliable_npu_device)) device_info_list = device.split(':', 1) device_id = device_info_list[1] device_id = int(device_id) place = core.NPUPlace(device_id) if avaliable_mlu_device: if not core.is_compiled_with_mlu(): raise ValueError( "The device should not be {}, since PaddlePaddle is " "not compiled with mlu".format(avaliable_mlu_device)) device_info_list = device.split(':', 1) device_id = device_info_list[1] device_id = int(device_id) place = core.MLUPlace(device_id) return place
def test_concat_rows(self): places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place)
def test_mnist_forward_float32(self): epoch_num = 1 with fluid.dygraph.guard(): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) mnist = MNIST() sgd = SGDOptimizer( learning_rate=1e-3, parameter_list=mnist.parameters()) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) dy_param_init_value = {} mnist.eval() for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): dy_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape(128, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in mnist.parameters(): dy_param_init_value[param.name] = param.numpy() with new_program_scope(): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) mnist = MNIST() sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=128, drop_last=True) img = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in mnist.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array( [x[1] for x in data]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] out = exe.run( fluid.default_main_program(), feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) static_out = out[0] self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(static_out, dy_out))
def test_forward_backward(self): def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_grad = np.random.random_sample(shape).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share the same memory "VarianceOut": block.var('variance'), # share the same memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": momentum, "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean_out, out[1], "mean") self.__assert_close(variance_out, out[2], "variance", 1e-3) self.__assert_close(saved_mean, out[3], "saved_mean") self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) self.__assert_close(x_grad, out[5], "x_grad") self.__assert_close(scale_grad, out[6], "scale_grad") self.__assert_close(bias_grad, out[7], "bias_grad") print "op test forward passed: ", str(place), data_layout places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): places.append(core.CUDAPlace(0)) for place in places: for data_format in self.data_formats: test_with_place(place, data_format, [2, 3, 4, 5])
def test_ocr_test(self): seed = 90 epoch_num = 1 if core.is_compiled_with_cuda(): batch_num = 3 else: batch_num = 2 np.random.seed = seed image_np = np.random.randn(Config.batch_size, Config.DATA_SHAPE[0], Config.DATA_SHAPE[1], Config.DATA_SHAPE[2]).astype('float32') label_in_np = np.arange(0, Config.max_length, dtype='int64').reshape([1, Config.max_length]) for i in range(2, Config.batch_size + 1): label_in_np = np.vstack( (label_in_np, np.arange((i - 1) * Config.max_length, i * Config.max_length, dtype='int64').reshape([1, Config.max_length]))) label_out_np = np.arange(0, Config.max_length, dtype='int64').reshape([1, Config.max_length]) for i in range(2, Config.batch_size + 1): label_out_np = np.vstack( (label_out_np, np.arange((i - 1) * Config.max_length, i * Config.max_length, dtype='int64').reshape([1, Config.max_length]))) def run_dygraph(): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) ocr_attention = OCRAttention() if Config.learning_rate_decay == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [50000], [Config.LR, Config.LR * 0.01]) else: learning_rate = Config.LR optimizer = fluid.optimizer.SGD( learning_rate=0.001, parameter_list=ocr_attention.parameters()) dy_param_init_value = {} for param in ocr_attention.parameters(): dy_param_init_value[param.name] = param.numpy() for epoch in range(epoch_num): for batch_id in range(batch_num): label_in = to_variable(label_in_np) label_out = to_variable(label_out_np) label_out.stop_gradient = True img = to_variable(image_np) dy_prediction = ocr_attention(img, label_in) label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False) dy_prediction = fluid.layers.reshape( dy_prediction, [label_out.shape[0], -1], inplace=False) loss = fluid.layers.cross_entropy(input=dy_prediction, label=label_out) avg_loss = fluid.layers.reduce_sum(loss) dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in ocr_attention.parameters(): if param.name not in dy_param_init_value: dy_param_init_value[param.name] = param.numpy() avg_loss.backward() dy_grad_value = {} for param in ocr_attention.parameters(): if param.trainable: np_array = np.array( param._grad_ivar().value().get_tensor()) dy_grad_value[param.name + core.grad_var_suffix()] = np_array optimizer.minimize(avg_loss) ocr_attention.clear_gradients() dy_param_value = {} for param in ocr_attention.parameters(): dy_param_value[param.name] = param.numpy() return dy_out, dy_param_init_value, dy_param_value with fluid.dygraph.guard(): dy_out, dy_param_init_value, dy_param_value = run_dygraph() with fluid.dygraph.guard(): with _test_eager_guard(): eager_out, eager_param_init_value, eager_param_value = run_dygraph( ) with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ocr_attention = OCRAttention() if Config.learning_rate_decay == "piecewise_decay": learning_rate = fluid.layers.piecewise_decay( [50000], [Config.LR, Config.LR * 0.01]) else: learning_rate = Config.LR optimizer = fluid.optimizer.SGD(learning_rate=0.001) images = fluid.layers.data(name='pixel', shape=Config.DATA_SHAPE, dtype='float32') static_label_in = fluid.layers.data(name='label_in', shape=[1], dtype='int64', lod_level=0) static_label_out = fluid.layers.data(name='label_out', shape=[1], dtype='int64', lod_level=0) static_label_out.stop_gradient = True static_label_out.trainable = False static_prediction = ocr_attention(images, static_label_in) static_prediction = fluid.layers.reshape( static_prediction, shape=[-1, Config.num_classes + 2]) cost = fluid.layers.cross_entropy(input=static_prediction, label=static_label_out) static_avg_loss = fluid.layers.reduce_sum(cost) # param_grad_list = fluid.backward.append_backward(static_avg_loss) optimizer.minimize(static_avg_loss) static_param_init_value = {} static_param_name_list = [] static_grad_name_list = [] for param in ocr_attention.parameters(): static_param_name_list.append(param.name) if param.trainable: static_grad_name_list.append(param.name + core.grad_var_suffix()) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] fetch_list = [static_avg_loss.name] fetch_list.extend(static_param_name_list) fetch_list.extend(static_grad_name_list) for epoch in range(epoch_num): for batch_id in range(batch_num): static_label_in = label_in_np static_label_out = label_out_np static_label_out = static_label_out.reshape((-1, 1)) out = exe.run(fluid.default_main_program(), feed={ "pixel": image_np, "label_in": static_label_in, "label_out": static_label_out }, fetch_list=fetch_list) static_param_value = {} static_grad_value = {} static_out = out[0] for i in range(1, len(static_param_name_list) + 1): static_param_value[static_param_name_list[i - 1]] = out[i] grad_start_pos = len(static_param_name_list) + 1 for i in range(grad_start_pos, len(static_grad_name_list) + grad_start_pos): static_grad_value[static_grad_name_list[ i - grad_start_pos]] = out[i] self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.array_equal(value, dy_param_init_value[key])) for key, value in six.iteritems(static_param_value): self.assertTrue(np.allclose(value, dy_param_value[key], rtol=1e-05)) # check eager here self.assertTrue(np.allclose(static_out, eager_out)) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.array_equal(value, eager_param_init_value[key])) for key, value in six.iteritems(static_param_value): self.assertTrue( np.allclose(value, eager_param_value[key], rtol=1e-05))
def test_mnist_float32(self): seed = 90 epoch_num = 1 state = np.random.normal(size=4).astype("float32") state_list = state.tolist() reward = np.random.random(size=[1, 1]).astype("float32") reward_list = reward.tolist() action_list = [1] action = np.array(action_list).astype("float32") mask_list = [[0, 1]] mask = np.array(mask_list).astype("float32") with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed policy = Policy("PolicyModel") dy_state = fluid.dygraph.base.to_variable(state) dy_state.stop_gradient = True loss_probs = policy(dy_state) dy_mask = fluid.dygraph.base.to_variable(mask) dy_mask.stop_gradient = True loss_probs = fluid.layers.log(loss_probs) loss_probs = fluid.layers.elementwise_mul(loss_probs, dy_mask) loss_probs = fluid.layers.reduce_sum(loss_probs, dim=-1) dy_reward = fluid.dygraph.base.to_variable(reward) dy_reward.stop_gradient = True loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs) loss = fluid.layers.reduce_sum(loss_probs) sgd = SGDOptimizer(learning_rate=1e-3) dy_param_init_value = {} dy_out = loss.numpy() for param in policy.parameters(): dy_param_init_value[param.name] = param.numpy() loss.backward() sgd.minimize(loss) policy.clear_gradients() dy_param_value = {} for param in policy.parameters(): dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) policy = Policy("PolicyModel") st_sgd = SGDOptimizer(learning_rate=1e-3) st_state = fluid.layers.data(name='st_state', shape=[4], dtype='float32') st_reward = fluid.layers.data(name='st_reward', shape=[1], dtype='float32') st_mask = fluid.layers.data(name='st_mask', shape=[2], dtype='float32') st_loss_probs = policy(st_state) st_loss_probs = fluid.layers.log(st_loss_probs) st_loss_probs = fluid.layers.elementwise_mul( st_loss_probs, st_mask) st_loss_probs = fluid.layers.reduce_sum(st_loss_probs, dim=-1) st_loss_probs = fluid.layers.elementwise_mul( st_reward, st_loss_probs) st_loss = fluid.layers.reduce_sum(st_loss_probs) st_sgd.minimize(st_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in policy.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] fetch_list = [st_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "st_state": state, "st_reward": reward, "st_mask": mask }, fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): static_param_value[static_param_name_list[i - 1]] = out[i] #self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.equal(value, dy_param_init_value[key]).all()) self.assertTrue(np.equal(static_out, dy_out).all()) for key, value in six.iteritems(static_param_value): self.assertTrue(np.equal(value, dy_param_value[key]).all())
def check_output(self, atol=1e-5): places = [core.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu(self.op_type): places.append(core.CUDAPlace(0)) for place in places: self.check_output_with_place(place, atol)