def check_l2decay(self, place, model): paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() with self.scope_prog_guard(main_prog=main_prog, startup_prog=startup_prog): data = fluid.layers.data(name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost_l2 = model(data, label, len(self.word_dict)) param_list = fluid.default_main_program().block(0).all_parameters() para_sum = [] for para in param_list: para_mul = fluid.layers.square(x=para) para_sum.append(fluid.layers.reduce_sum(input=para_mul)) avg_cost_l2 += fluid.layers.sums(para_sum) * .5 optimizer = fluid.optimizer.Adagrad(learning_rate=0.1) optimizer.minimize(avg_cost_l2) param_sum = self.run_program(place, [data, label]) return param_sum
def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) startup_prog = fluid.Program() main_prog = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main_prog, startup_prog): image = fluid.layers.data( name='image', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') py_reader = fluid.io.PyReader( feed_list=[image, label], capacity=4, iterable=not use_legacy_py_reader, use_double_buffer=use_double_buffer) hidden = image for hidden_size in [10, 20, 30]: hidden = fluid.layers.fc( hidden, size=hidden_size, act='tanh', bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=1.0))) predict_label = fluid.layers.fc(hidden, size=CLASS_NUM, act='softmax') loss = fluid.layers.mean( fluid.layers.cross_entropy( input=predict_label, label=label)) optimizer = fluid.optimizer.Adam() optimizer.minimize(loss) return startup_prog, main_prog, py_reader, loss
def run_simple_conv(inp_np, use_scaler=True): paddle.manual_seed(10) paddle.framework.random._manual_program_seed(10) with fluid.dygraph.guard(): model = SimpleConv( num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') optimizer = fluid.optimizer.SGDOptimizer( learning_rate=0.01, parameter_list=model.parameters()) scaler = fluid.dygraph.AmpScaler(init_loss_scaling=1024) data = fluid.dygraph.to_variable(inp_np) out = model(data) loss = fluid.layers.mean(out) if use_scaler: print('use scaler') scaled_loss = scaler.scale(loss) scaled_loss.backward() optimize_ops, params_grads = scaler.minimize(optimizer, scaled_loss) else: print('use no scaler') loss.backward() optimize_ops, params_grads = optimizer.minimize(loss) return optimize_ops, params_grads
def train(conf_dict, to_static): """ train process """ program_translator = ProgramTranslator() program_translator.enable(to_static) # Get device if fluid.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() with fluid.dygraph.guard(place): paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED) conf_dict['dict_size'] = len(vocab) conf_dict['seq_len'] = args.seq_len net = BOW(conf_dict) loss = HingeLoss(conf_dict) optimizer = fluid.optimizer.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, parameter_list=net.parameters()) metric = fluid.metrics.Auc(name="auc") global_step = 0 losses = [] train_loader = fluid.io.DataLoader.from_generator( capacity=16, return_list=True, iterable=True, use_double_buffer=True) get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) train_loader.set_sample_list_generator( paddle.batch(get_train_examples, batch_size=args.batch_size), place) for left, pos_right, neg_right in train_loader(): left = fluid.layers.reshape(left, shape=[-1, 1]) pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1]) net.train() global_step += 1 left_feat, pos_score = net(left, pos_right) pred = pos_score _, neg_score = net(left, neg_right) avg_cost = loss.compute(pos_score, neg_score) losses.append(np.mean(avg_cost.numpy())) avg_cost.backward() optimizer.minimize(avg_cost) net.clear_gradients() return losses
def setUp(self): self.model_path = "model.test_jit_save_load" # enable dygraph mode fluid.enable_dygraph() # config seed paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED)
def test_generator_randperm_static(self): fluid.disable_dygraph() paddle.manual_seed(123123143) startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # example 1: # attr shape is a list which doesn't contain tensor Variable. result_1 = paddle.randperm(10) result_2 = paddle.randperm(10) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) out1 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) paddle.manual_seed(123123143) out2 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) out1_res1 = np.array(out1[0]) out1_res2 = np.array(out1[1]) out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) if not core.is_compiled_with_cuda(): print(">>>>>>> randperm static >>>>>>>") self.assertTrue(np.allclose(out1_res1, out2_res1)) self.assertTrue(np.allclose(out1_res2, out2_res2)) self.assertTrue(not np.allclose(out1_res2, out1_res1))
def training_test(self, is_sparse): with fluid.program_guard(fluid.Program(), fluid.Program()): paddle.manual_seed(1) start_up = fluid.default_startup_program() x = np.arange(6).reshape(6) path_table = np.array([(1, 2, -1), (1, 2, -1)]).astype('int64') path_code = np.array([(1, 0, -1), (0, 0, -1)]).astype('int64') label = np.array([1, 4]).astype('int64') loss, data_list = self.hs_net_conf(is_sparse) optimizer = fluid.optimizer.SGD(learning_rate=1e-3) optimizer.minimize(loss) main_program = fluid.default_main_program() place = fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=data_list, place=place) exe = fluid.Executor(place) exe.run(start_up) result = list() for i in range(10): data = [([[x[i % 2]]], [list(path_table[i % 2])], [list(path_code[i % 2])], [label[i % 2]])] loss_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss]) result.append(loss_val) return result
def check_weight_decay2(self, place, model): paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) main_prog = fluid.framework.Program() startup_prog = fluid.framework.Program() with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog): data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") avg_cost = model(data, label, len(self.word_dict)) param_list = [(var, var * self.learning_rate) for var in main_prog.block(0).all_parameters()] optimizer = fluid.optimizer.Adam(learning_rate=self.learning_rate) optimizer.minimize(avg_cost) for params in param_list: updated_p = fluid.layers.elementwise_sub( x=params[0], y=params[1]) fluid.layers.assign(input=updated_p, output=params[0]) param_sum = self.run_program(place, [data, label]) return param_sum
def setUp(self): # enable dygraph mode place = paddle.CPUPlace() paddle.disable_static(place) # config seed paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED) # create network self.layer = LinearNet() self.loss_fn = nn.CrossEntropyLoss() self.sgd = opt.SGD(learning_rate=0.001, parameters=self.layer.parameters()) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) self.loader = paddle.io.DataLoader( dataset, places=place, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2) # train train(self.layer, self.loader, self.loss_fn, self.sgd) # save self.model_path = "linear.example.model" paddle.jit.save(self.layer, self.model_path)
def setUp(self): self.linear_size = 4 self.model_path = "model.jit_prune_model_and_load" # enable dygraph mode fluid.enable_dygraph() # config seed paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED)
def multihead_attention_test_helper(self_attention, cache): paddle.manual_seed(2020) paddle.framework.random._manual_program_seed(2020) # self_attention|cross_attention, cache|No cache with fluid.dygraph.guard(fluid.CPUPlace()): # generate params for multi_head_attention batch_size, query_length, key_length, value_length, embed_dim, kdim, vdim, num_heads, attn_dropout = generate_basic_params( "attn", self_attention) query, key, value, attn_mask, cache_dict = generate_query_key_value_cache( self_attention, batch_size, num_heads, query_length, embed_dim, key_length, value_length, kdim, vdim, cache) if cache and self_attention: attn_mask = np.concatenate((attn_mask, attn_mask), axis=3) need_weight, param_attr, bias_attr = False, None, None # call paddle's function multi_head_attn = MultiHeadAttention(embed_dim, num_heads, attn_dropout, kdim, vdim, need_weight, param_attr, bias_attr) # construct cache object cache_obj = None if cache_dict: if 'k' and 'v' in cache_dict: cache_obj = multi_head_attn.Cache( paddle.to_variable(cache_dict['k']), paddle.to_variable(cache_dict['v'])) elif 'static_k' and 'static_v' in cache_dict: cache_obj = multi_head_attn.StaticCache( paddle.to_variable(cache_dict['static_k']), paddle.to_variable(cache_dict['static_v'])) if attn_mask is not None: attn_output = multi_head_attn( paddle.to_variable(query), paddle.to_variable(key), paddle.to_variable(value), paddle.to_variable(attn_mask), cache_obj) else: attn_output = multi_head_attn(paddle.to_variable(query), paddle.to_variable(key), paddle.to_variable(value), attn_mask, cache_obj) attn_output = attn_output[0] if cache_dict else attn_output # implementation by numpy # compute q, k, v q, k, v, _ = prepare_qkv(query, key, value, num_heads, embed_dim, self_attention, multi_head_attn, cache_dict) # scale dot product attention attn_heads = scaled_dot_product_attention( q, k, v, embed_dim // num_heads, attn_mask, multi_head_attn) out_proj_weight = multi_head_attn.out_proj.weight.numpy() reference = fc(attn_heads, out_proj_weight) np.testing.assert_allclose(attn_output.numpy(), reference, atol=1e-6)
def setUp(self): self.linear_size = 4 self.model_path = "model.jit_multi_load" # enable dygraph mode fluid.enable_dygraph() # config seed paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED) # train and save base model self.train_and_save_orig_model()
def train(args, to_static): program_translator.enable(to_static) place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() with fluid.dygraph.guard(place): np.random.seed(SEED) paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED) train_reader = fake_data_reader(args.class_num, args.vocab_size, args.batch_size, args.padding_size) train_loader = fluid.io.DataLoader.from_generator(capacity=24) train_loader.set_sample_list_generator(train_reader) if args.model_type == 'cnn_net': model = CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model = BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model = GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model = BiGRU(args.vocab_size, args.batch_size, args.padding_size) sgd_optimizer = fluid.optimizer.Adagrad( learning_rate=args.lr, parameter_list=model.parameters()) loss_data = [] for eop in range(args.epoch): time_begin = time.time() for batch_id, data in enumerate(train_loader()): word_ids, labels, seq_lens = data doc = to_variable(word_ids.numpy().reshape(-1)).astype('int64') label = labels.astype('int64') model.train() avg_cost, prediction, acc = model(doc, label) loss_data.append(avg_cost.numpy()[0]) avg_cost.backward() sgd_optimizer.minimize(avg_cost) model.clear_gradients() if batch_id % args.log_step == 0: time_end = time.time() used_time = time_end - time_begin print("step: %d, ave loss: %f, speed: %f steps/s" % (batch_id, avg_cost.numpy()[0], args.log_step / used_time)) time_begin = time.time() if batch_id == args.train_step: break batch_id += 1 return loss_data
def check(self, place, use_cuda): paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) main_program = fluid.Program() startup_program = fluid.Program() x, y, loss = self.build_program(main_program, startup_program, use_cuda) exe = fluid.Executor(place) iters = 10 batch_size = 16 feeder = fluid.DataFeeder(feed_list=[x, y], place=place) # close fused_bn_act_ops build_strategy = fluid.BuildStrategy() build_strategy.fuse_bn_act_ops = False binary = fluid.CompiledProgram(main_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size) loss_vals = [] scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup_program) for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) loss_vals.append(loss_v[0][0]) # open fused_bn_act_ops build_strategy_fused = fluid.BuildStrategy() build_strategy_fused.fuse_bn_act_ops = True binary_fused = fluid.CompiledProgram(main_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy_fused) train_reader_fused = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size) loss_vals_fused = [] scope_fused = fluid.Scope() with fluid.scope_guard(scope_fused): exe.run(startup_program) for _ in range(iters): data = next(train_reader_fused()) loss_v = exe.run(binary_fused, feed=feeder.feed(data), fetch_list=[loss]) loss_vals_fused.append(loss_v[0][0]) # check loss for i in range(iters): self.assertAlmostEqual(loss_vals[i], loss_vals_fused[i], delta=1e-5)
def __init__(self, cfg): paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) self.generator = Generator(cfg) self.discriminator = Discriminator(cfg) self.g_optimizer = build_optimizer(self.generator, cfg) self.d_optimizer = build_optimizer(self.discriminator, cfg) self.cfg = cfg fluid.set_flags({'FLAGS_sort_sum_gradient': cfg.sort_sum_gradient})
def __init__(self, cfg): self.cfg = cfg def create_data_layer(): image_real = fluid.data( shape=[None, 3, cfg.image_size, cfg.image_size], dtype='float32', name='image_real') label_org = fluid.data( shape=[None, cfg.c_dim], dtype='float32', name='label_org') label_trg = fluid.data( shape=[None, cfg.c_dim], dtype='float32', name='label_trg') return image_real, label_org, label_trg paddle.manual_seed(cfg.seed) paddle.framework.random._manual_program_seed(cfg.seed) self.gen_program = fluid.Program() gen_startup_program = fluid.Program() with fluid.program_guard(self.gen_program, gen_startup_program): with fluid.unique_name.guard(): image_real, label_org, label_trg = create_data_layer() generator = Generator(cfg) discriminator = Discriminator(cfg) g_loss = get_generator_loss(image_real, label_org, label_trg, generator, discriminator, cfg) build_optimizer(generator, cfg, loss=g_loss) self.dis_program = fluid.Program() dis_startup_program = fluid.Program() with fluid.program_guard(self.dis_program, dis_startup_program): with fluid.unique_name.guard(): image_real, label_org, label_trg = create_data_layer() generator = Generator(cfg) discriminator = Discriminator(cfg) d_loss = get_discriminator_loss(image_real, label_org, label_trg, generator, discriminator, cfg) build_optimizer(discriminator, cfg, loss=d_loss) self.executor = fluid.Executor(cfg.place) self.scope = fluid.Scope() with fluid.scope_guard(self.scope): self.executor.run(gen_startup_program) self.executor.run(dis_startup_program) self.g_loss = g_loss self.d_loss = d_loss
def check_with_place(self, place): scope = core.Scope() out = scope.var("X").get_selected_rows() paddle.manual_seed(10) op = Operator("uniform_random", Out="X", shape=[1000, 784], min=-5.0, max=10.0, seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), "hist: " + str(hist))
def setUp(self): self.op_type = "gaussian_random" self.set_attrs() self.inputs = {} self.use_mkldnn = False self.attrs = { "shape": [123, 92], "mean": self.mean, "std": self.std, "seed": 10, "use_mkldnn": self.use_mkldnn } paddle.manual_seed(10) self.outputs = {'Out': np.zeros((123, 92), dtype='float32')}
def test_generator_uniform_random_static(self): fluid.disable_dygraph() gen = paddle.manual_seed(123123143) startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # example 1: # attr shape is a list which doesn't contain tensor Variable. result_1 = fluid.layers.uniform_random(shape=[3, 4]) result_2 = fluid.layers.uniform_random(shape=[3, 4]) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) out1 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) #gen.set_state(cur_state) gen.manual_seed(123123143) out2 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) out1_res1 = np.array(out1[0]) out1_res2 = np.array(out1[1]) out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) if not core.is_compiled_with_cuda(): self.assertTrue(np.allclose(out1_res1, out2_res1)) self.assertTrue(np.allclose(out1_res2, out2_res2)) self.assertTrue(not np.allclose(out1_res2, out1_res1))
def _check_exception(self, exception_message, place=None): seed = 90 batch_size = 128 if place == None: place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() with fluid.dygraph.guard(place): try: paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) mlp = MLP() optimizer = self.get_optimizer_dygraph( parameter_list=mlp.parameters()) except Exception as e: assert str(e) == exception_message
def test_attr_tensor_API(self): _seed = 10 gen = paddle.manual_seed(_seed) gen._is_init_py = False startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): _min = 5 _max = 10 ret = fluid.layers.nn.uniform_random([2, 3, 2], min=_min, max=_max, seed=_seed) ret_2 = fluid.layers.nn.uniform_random([2, 3, 2], min=_min, max=_max, seed=_seed) res = fluid.layers.equal(ret, ret_2) place = fluid.CPUPlace() if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) ret_value, cmp_value = exe.run(train_program, fetch_list=[ret, res]) self.assertTrue(np.array(cmp_value).all()) for i in ret_value.flatten(): self.assertGreaterEqual(i, _min) self.assertLess(i, _max)
def test_gen_TruncatedNormal_initializer(self): fluid.disable_dygraph() gen = paddle.manual_seed(123123143) cur_state = paddle.get_cuda_rng_state() startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # example 1: # attr shape is a list which doesn't contain tensor Variable. x = fluid.layers.uniform_random(shape=[2, 10]) result_1 = fluid.layers.fc( input=x, size=10, param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) result_2 = fluid.layers.fc( input=x, size=10, param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) out1 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) paddle.manual_seed(123123143) with fluid.program_guard(train_program, startup_program): exe.run(startup_program) out2 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) out1_res1 = np.array(out1[0]) out1_res2 = np.array(out1[1]) out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) if core.is_compiled_with_cuda(): print(">>>>>>> truncated normal static >>>>>>>") self.assertTrue(np.allclose(out1_res1, out2_res1)) self.assertTrue(np.allclose(out1_res2, out2_res2)) self.assertTrue(not np.allclose(out1_res2, out1_res1))
def test_api(self): paddle.manual_seed(10) x = fluid.layers.data('x', shape=[16], dtype='float32', lod_level=1) y = fluid.layers.fc(x, size=16, param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5, seed=10, diag_num=16, diag_step=16, diag_val=1.0)) place = fluid.CPUPlace() x_tensor = fluid.create_lod_tensor( np.random.rand(3, 16).astype("float32"), [[1, 2]], place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) ret = exe.run(feed={'x': x_tensor}, fetch_list=[y], return_numpy=False)
def test_repeated_regularization(self): l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.01) fc_param_attr = fluid.ParamAttr(regularizer=l1) with fluid.program_guard(fluid.Program(), fluid.Program()): x = fluid.layers.uniform_random([2, 2, 3]) out = fluid.layers.fc(x, 5, param_attr=fc_param_attr) loss = fluid.layers.reduce_sum(out) sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2) sgd.minimize(loss) with fluid.dygraph.guard(): input = fluid.dygraph.to_variable( np.random.randn(3, 2).astype('float32')) paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) linear1 = fluid.dygraph.Linear(2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) linear2 = fluid.dygraph.Linear(2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) loss1 = linear1(input) loss1.backward() # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr fluid.optimizer.SGD(parameter_list=linear1.parameters(), learning_rate=1e-2, regularization=l2).minimize(loss1) # only set l1 in fluid.ParamAttr loss2 = linear2(input) loss2.backward() fluid.optimizer.SGD(parameter_list=linear2.parameters(), learning_rate=1e-2).minimize(loss2) # they should both be applied by l1, and keep the same self.assertTrue( np.allclose(linear1.weight.numpy(), linear2.weight.numpy()), "weight should use the regularization in fluid.ParamAttr!") self.assertTrue( np.allclose(linear1.bias.numpy(), linear2.bias.numpy()), "bias should use the regularization in fluid.ParamAttr!")
def check_with_place(self, place): scope = core.Scope() out = scope.var("X").get_selected_rows() shape_1 = scope.var("shape1").get_tensor() shape_1.set(np.array([1000]).astype("int64"), place) shape_2 = scope.var("shape2").get_tensor() shape_2.set(np.array([784]).astype("int64"), place) paddle.manual_seed(10) op = Operator("uniform_random", ShapeTensorList=["shape1", "shape2"], Out="X", min=-5.0, max=10.0, seed=10) op.run(scope, place) self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) self.assertTrue(np.allclose(hist, prob, rtol=0, atol=0.01), "hist: " + str(hist))
def test_compiled_program_base(self): with new_program_scope(): paddle.manual_seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) loss = simple_fc_net() exe.run(fluid.default_startup_program()) compiled_prog = fluid.CompiledProgram(fluid.default_main_program()) loss_data, = exe.run(compiled_prog, feed={ "image": self.img, "label": self.label }, fetch_list=[loss.name]) self.assertTrue(np.array_equal(loss_data[0], self.loss))
def setUpClass(cls): if not fluid.is_compiled_with_cuda(): self.skipTest('module not tested when ONLY_CPU compling') cls.device = paddle.set_device('gpu') fluid.enable_dygraph(cls.device) sp_num = 1280 cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num) cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num) cls.test_dataset = MnistDataset(mode='test', return_label=False, sample_num=sp_num) cls.train_loader = fluid.io.DataLoader(cls.train_dataset, places=cls.device, batch_size=64) cls.val_loader = fluid.io.DataLoader(cls.val_dataset, places=cls.device, batch_size=64) cls.test_loader = fluid.io.DataLoader(cls.test_dataset, places=cls.device, batch_size=64) seed = 333 paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) dy_lenet = LeNetDygraph() cls.init_param = dy_lenet.state_dict() dynamic_train(dy_lenet, cls.train_loader) cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader) cls.inputs = [InputSpec([-1, 1, 28, 28], 'float32', 'image')] cls.labels = [InputSpec([None, 1], 'int64', 'label')] cls.save_dir = tempfile.mkdtemp() cls.weight_path = os.path.join(cls.save_dir, 'lenet') fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path) fluid.disable_dygraph()
def _prepare_program(self, config, parallel=True): paddle.manual_seed(config.random_seed) self.main_program = fluid.Program() self.startup_program = fluid.Program() with fluid.program_guard(self.main_program, self.startup_program): with fluid.unique_name.guard(): res_vars = lm_model(config.hidden_size, config.vocab_size, config.batch_size, num_layers=config.num_layers, num_steps=config.num_steps, init_scale=config.init_scale, dropout=config.dropout, rnn_model=config.rnn_model) self.loss, self.last_hidden, self.last_cell, self.feed_order = res_vars fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=config.max_grad_norm)) self.learning_rate = fluid.layers.create_global_var( name="learning_rate", shape=[1], value=1.0, dtype='float32', persistable=True) optimizer = fluid.optimizer.SGD( learning_rate=self.learning_rate) optimizer.minimize(self.loss) self.exe.run(self.startup_program) if parallel: self.train_program = fluid.compiler.CompiledProgram( self.main_program).with_data_parallel( loss_name=self.loss.name, build_strategy=self.build_strategy, exec_strategy=self.exec_strategy) else: self.train_program = self.main_program
def fit(self, dynamic, num_replicas=None, rank=None): fluid.enable_dygraph(self.device) if dynamic else None seed = 333 paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) net = LeNet(classifier_activation=None) optim_new = fluid.optimizer.Adam(learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs=self.inputs, labels=self.labels) model.prepare(optim_new, loss=CrossEntropyLoss(reduction="sum"), metrics=Accuracy()) model.fit(self.train_dataset, batch_size=64, shuffle=False) result = model.evaluate(self.val_dataset, batch_size=64) np.testing.assert_allclose(result['acc'], self.acc1) train_sampler = DistributedBatchSampler(self.train_dataset, batch_size=64, shuffle=False, num_replicas=num_replicas, rank=rank) val_sampler = DistributedBatchSampler(self.val_dataset, batch_size=64, shuffle=False, num_replicas=num_replicas, rank=rank) train_loader = fluid.io.DataLoader(self.train_dataset, batch_sampler=train_sampler, places=self.device, return_list=True) val_loader = fluid.io.DataLoader(self.val_dataset, batch_sampler=val_sampler, places=self.device, return_list=True) model.fit(train_loader, val_loader) fluid.disable_dygraph() if dynamic else None
def test_generator_gaussian_random_dygraph(self): """Test Generator seed.""" fluid.enable_dygraph() paddle.manual_seed(12312321111) x = fluid.layers.gaussian_random([120], dtype="float32") st1 = paddle.get_cuda_rng_state() x1 = fluid.layers.gaussian_random([120], dtype="float32") paddle.set_cuda_rng_state(st1) x2 = fluid.layers.gaussian_random([120], dtype="float32") paddle.manual_seed(12312321111) x3 = fluid.layers.gaussian_random([120], dtype="float32") x_np = x.numpy() x1_np = x1.numpy() x2_np = x2.numpy() x3_np = x3.numpy() if core.is_compiled_with_cuda(): print(">>>>>>> gaussian random dygraph >>>>>>>") self.assertTrue(np.allclose(x1_np, x2_np)) self.assertTrue(np.allclose(x_np, x3_np))