def fast_preprocess_layer(img, input_size, normalize, subtract_means, to_float, mean=MEANS, std=STD): ''' 对图片预处理。用paddle而不使用numpy来得到更快的速度。预测时使用。 ''' # NCHW img = P.transpose(img, perm=[0, 3, 1, 2]) img = P.image_resize(img, out_shape=[input_size, input_size], resample="BILINEAR") if normalize: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) v = P.create_tensor(dtype='float32') P.assign(np.array(std).astype(np.float32), v) v = P.reshape(v, (1, 3, 1, 1)) v = P.expand_as(v, target_tensor=img) img = (img - m) / v elif subtract_means: m = P.create_tensor(dtype='float32') P.assign(np.array(mean).astype(np.float32), m) m = P.reshape(m, (1, 3, 1, 1)) m = P.expand_as(m, target_tensor=img) img = (img - m) elif to_float: # 只是归一化 img = img / 255 # 换成RGB格式 img_rgb = P.concat([img[:, 2:3, :, :], img[:, 1:2, :, :], img[:, 0:1, :, :]], axis=1) # Return value is in channel order [n, c, h, w] and RGB return img_rgb
def not_test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = split_lod_tensor(input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=10) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array([x[0] for x in data]).astype("float32") y_data = np.array([x[1] for x in data]).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: return self.assertFalse(True)
def test_square_error_cost(self): input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32") sub = input_val - label_val np_result = sub * sub input_var = layers.create_tensor(dtype="float32", name="input") label_var = layers.create_tensor(dtype="float32", name="label") layers.assign(input=input_val, output=input_var) layers.assign(input=label_val, output=label_var) output = layers.square_error_cost(input=input_var, label=label_var) for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) result = exe.run(fluid.default_main_program(), feed={ "input": input_var, "label": label_var }, fetch_list=[output]) self.assertTrue(np.isclose(np_result, result).all())
def __call__(self, batch_C_prime, I_r_size): C = self.build_C() P = self.build_P(I_r_size) inv_delta_C = self.build_inv_delta_C(C).astype('float32') P_hat = self.build_P_hat(C, P).astype('float32') inv_delta_C_tensor = layers.create_tensor(dtype='float32') layers.assign(inv_delta_C, inv_delta_C_tensor) inv_delta_C_tensor.stop_gradient = True P_hat_tensor = layers.create_tensor(dtype='float32') layers.assign(P_hat, P_hat_tensor) P_hat_tensor.stop_gradient = True batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) # batch_C_ex_part_tensor = create_tmp_var( # fluid.default_main_program(), # name='batch_C_ex_part_tensor', # dtype='float32', shape=[-1, 3, 2]) # layers.py_func(func=get_batch_C_expand, # x=[batch_C_prime], out=[batch_C_ex_part_tensor]) batch_C_ex_part_tensor.stop_gradient = True batch_C_prime_with_zeros = layers.concat( [batch_C_prime, batch_C_ex_part_tensor], axis=1) batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) batch_P_prime = layers.matmul(P_hat_tensor, batch_T) return batch_P_prime
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(out) append_backward(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print(outs) loss = layers.mean(out) append_backward(loss=loss) outs = exe.run(feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print(outs)
def test_uniform_name(self): name = 'test_uniform' uniform1 = Uniform(0.0, 1.0, name=name) self.assertEqual(uniform1.name, name) uniform2 = Uniform(0.0, 1.0) self.assertEqual(uniform2.name, 'Uniform') paddle.enable_static() sample = uniform1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = uniform1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') value_npdata = np.array([0.8], dtype="float32") value_tensor = layers.create_tensor(dtype="float32") layers.assign(value_npdata, value_tensor) lp = uniform1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') p = uniform1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs')
def test_normal_name(self): name = 'test_normal' normal1 = Normal(0.0, 1.0, name=name) self.assertEqual(normal1.name, name) normal2 = Normal(0.0, 1.0) self.assertEqual(normal2.name, 'Normal') paddle.enable_static() sample = normal1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = normal1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') value_npdata = np.array([0.8], dtype="float32") value_tensor = layers.create_tensor(dtype="float32") layers.assign(value_npdata, value_tensor) lp = normal1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob') p = normal1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs') kl = normal1.kl_divergence(normal2) self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')
def test_categorical_name(self): name = 'test_categorical' categorical1 = Categorical([0.4, 0.6], name=name) self.assertEqual(categorical1.name, name) categorical2 = Categorical([0.5, 0.5]) self.assertEqual(categorical2.name, 'Categorical') paddle.enable_static() sample = categorical1.sample([2]) self.assertEqual(self.get_prefix(sample.name), name + '_sample') entropy = categorical1.entropy() self.assertEqual(self.get_prefix(entropy.name), name + '_entropy') kl = categorical1.kl_divergence(categorical2) self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence') value_npdata = np.array([0], dtype="int64") value_tensor = layers.create_tensor(dtype="int64") layers.assign(value_npdata, value_tensor) p = categorical1.probs(value_tensor) self.assertEqual(self.get_prefix(p.name), name + '_probs') lp = categorical1.log_prob(value_tensor) self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
def test_fetch_var(self): self.set_input() x = layers.create_tensor(dtype="int32", persistable=True, name="x") layers.assign(input=self.val, output=x) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[]) fetched_x = fluid.executor._fetch_var("x") self.assertTrue(numpy.array_equal(fetched_x, self.val), "fetch_x=%s val=%s" % (fetched_x, self.val)) self.assertEqual(fetched_x.dtype, self.val.dtype)
def test_assign(self): main_program = fluid.Program() with fluid.program_guard(main_program): x = layers.create_tensor(dtype=self.dtype) layers.assign(input=self.value, output=x) exe = fluid.Executor(self.place) [fetched_x] = exe.run(main_program, feed={}, fetch_list=[x]) self.assertTrue(numpy.array_equal(fetched_x, self.value), "fetch_x=%s val=%s" % (fetched_x, self.value)) self.assertEqual(fetched_x.dtype, self.value.dtype)
def test_load(self): main_prog = fluid.Program() start_prog = fluid.Program() with fluid.program_guard(main_prog, start_prog): var = layers.create_tensor(dtype='float32') layers.load(var, file_path='./model/w') exe = fluid.Executor(fluid.CPUPlace()) exe.run(start_prog) ret = exe.run(main_prog, fetch_list=[var.name]) self.assertTrue(np.array_equal(self.ones, ret[0]))
def test_fetch_var(self): val = numpy.array([1, 3, 5]).astype(numpy.int32) x = layers.create_tensor(dtype="int32", persistable=True, name="x") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[]) fetched_x = fluid.fetch_var("x") self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def test_assign(self): val = (-100 + 200 * numpy.random.random(size=(2, 5))).astype( numpy.int32) x = layers.create_tensor(dtype="float32") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) fetched_x = exe.run(fluid.default_main_program(), feed={}, fetch_list=[x])[0] self.assertTrue(numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def test_assign(self): val = ( -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32) x = layers.create_tensor(dtype="float32") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) fetched_x = exe.run(fluid.default_main_program(), feed={}, fetch_list=[x])[0] self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def __call__(self, batch_C_prime, I_r_size): """ Generate the grid for the grid_sampler. Args: batch_C_prime: the matrix of the geometric transformation I_r_size: the shape of the input image Return: batch_P_prime: the grid for the grid_sampler """ C = self.build_C() P = self.build_P(I_r_size) inv_delta_C = self.build_inv_delta_C(C).astype('float32') P_hat = self.build_P_hat(C, P).astype('float32') inv_delta_C_tensor = layers.create_tensor(dtype='float32') layers.assign(inv_delta_C, inv_delta_C_tensor) inv_delta_C_tensor.stop_gradient = True P_hat_tensor = layers.create_tensor(dtype='float32') layers.assign(P_hat, P_hat_tensor) P_hat_tensor.stop_gradient = True batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) # batch_C_ex_part_tensor = create_tmp_var( # fluid.default_main_program(), # name='batch_C_ex_part_tensor', # dtype='float32', shape=[-1, 3, 2]) # layers.py_func(func=get_batch_C_expand, # x=[batch_C_prime], out=[batch_C_ex_part_tensor]) batch_C_ex_part_tensor.stop_gradient = True batch_C_prime_with_zeros = layers.concat( [batch_C_prime, batch_C_ex_part_tensor], axis=1) batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) batch_P_prime = layers.matmul(P_hat_tensor, batch_T) return batch_P_prime
def test_masked_select(self): mask_shape = [4, 1] shape = [4, 4] data = np.random.random(mask_shape).astype("float32") input_data = np.random.random(shape).astype("float32") mask_data = data > 0.5 mask_data_b = np.broadcast_to(mask_data, shape) npresult = input_data[np.where(mask_data_b)] input_var = layers.create_tensor(dtype="float32", name="input") mask_var = layers.create_tensor(dtype="bool", name="mask") output = layers.masked_select(input=input_var, mask=mask_var) for use_cuda in ([False, True] if core.is_compiled_with_cuda() else [False]): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) result = exe.run(fluid.default_main_program(), feed={"input": input_data, "mask": mask_data}, fetch_list=[output]) self.assertTrue(np.isclose(npresult, result).all())
def test_distribution_error(self): distribution = Distribution() self.assertRaises(NotImplementedError, distribution.sample) self.assertRaises(NotImplementedError, distribution.entropy) normal = Normal(0.0, 1.0) self.assertRaises(NotImplementedError, distribution.kl_divergence, normal) value_npdata = np.array([0.8], dtype="float32") value_tensor = layers.create_tensor(dtype="float32") self.assertRaises(NotImplementedError, distribution.log_prob, value_tensor) self.assertRaises(NotImplementedError, distribution.probs, value_tensor)
def beam_search(): max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=max_out_len, force_cpu=True) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [ { "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_key], dtype=enc_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_value], dtype=enc_output.dtype, value=0), "static_k": # for encoder-decoder attention layers.create_tensor(dtype=enc_output.dtype), "static_v": # for encoder-decoder attention layers.create_tensor(dtype=enc_output.dtype) } for i in range(n_layer) ] with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) # Since beam_search_op dosen't enforce pre_ids' shape, we can do # inplace reshape here which actually change the shape of pre_ids. pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) # gather cell states corresponding to selected parent pre_src_attn_bias = layers.gather(trg_src_attn_bias, index=parent_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=pre_src_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = wrap_decoder(trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, dec_inputs=(pre_ids, pre_pos, None, pre_src_attn_bias), enc_output=enc_output, caches=caches, gather_idx=parent_idx, bos_idx=bos_idx) # intra-beam topK topk_scores, topk_indices = layers.topk( input=layers.softmax(logits), k=beam_size) accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores, axis=0) # beam_search op uses lod to differentiate branches. accu_scores = layers.lod_reset(accu_scores, pre_ids) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, end_id=eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) # cell states(caches) have been updated in wrap_decoder, # only need to update beam search states here. layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.assign(gather_idx, parent_idx) layers.assign(pre_src_attn_bias, trg_src_attn_bias) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=beam_size, end_id=eos_idx) return finished_ids, finished_scores
def test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = layers.ConditionalBlock([true_image]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = layers.ConditionalBlock([false_image]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def beam_search(): """Beam search function""" max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.max_out_len, force_cpu=True) min_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.min_out_len) neg_inf = layers.fill_constant(shape=[1], dtype='float32', value=-INF) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) step_next_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=1, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [ { "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "static_k_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_v_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_k_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype), "static_v_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype) } for i in range(self._dec_n_layer) ] trigram_blocking = TrigramBlocking(start_tokens, self.tokenizer, use_fp16=self._use_fp16, beam_size=self.beam_size) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) # Since beam_search_op dosen't enforce pre_ids' shape, we can do # inplace reshape here which actually change the shape of pre_ids. # pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) # gather cell states corresponding to selected parent pre_src_words_attn_bias = layers.gather( tgt_src_words_attn_bias, index=parent_idx) pre_src_sents_attn_bias = layers.gather( tgt_src_sents_attn_bias, index=parent_idx) pre_graph_attn_bias = layers.gather(graph_attn_bias, index=parent_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input= pre_src_sents_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = self.decode( dec_input=(pre_ids, pre_pos, None, pre_src_words_attn_bias, pre_src_sents_attn_bias, pre_graph_attn_bias), enc_words_output=enc_words_output, enc_sents_output=enc_sents_output, caches=caches, gather_idx=parent_idx) # prevent generating end token if length less than min_out_len eos_index = layers.fill_constant( shape=[layers.shape(logits)[0]], dtype='int64', value=self.eos_idx) eos_index = fluid.one_hot(eos_index, depth=self.voc_size) less_cond = layers.cast(layers.less_than(x=step_idx, y=min_len), dtype='float32') less_val = layers.elementwise_mul(less_cond, neg_inf) eos_val = layers.elementwise_mul(eos_index, less_val, axis=0) revised_logits = layers.elementwise_add(logits, eos_val, axis=0) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) topk_scores, topk_indices = layers.topk( input=layers.softmax(revised_logits), k=self.beam_size) # Roll-Back previous-scores for length-penalty # previous-scores has been length-penaltied, before this timestep length-penalty, need roll-back # because of doing this, we need store the length-penaltied score in `scores` # while calculating use the un-penaltied score # -> safe for step_idx == 0 (initialization state), because previous-score == 0 pre_timestep_length_penalty = fluid.layers.pow( ((5.0 + fluid.layers.cast(step_idx, pre_scores.dtype)) / 6.0), self.len_penalty) pre_scores_wo_len_penalty = fluid.layers.elementwise_mul( pre_scores, pre_timestep_length_penalty) # calc trigram-blocking delta scores for current alive sequence if self.block_trigram: trigram_blocking.update_seq(pre_ids, parent_idx) trigram_blocking.expand_cand_seq(topk_indices) fluid.layers.py_func( func=trigram_blocking.blocking_forward, x=[ trigram_blocking.cand_seq, trigram_blocking.id2is_full_token ], out=trigram_blocking.delta_score_out, backward_func=None) layers.Print(trigram_blocking.delta_score_out, summarize=100, message="trigram_blocking.delta_score_out") pre_scores_wo_len_penalty = fluid.layers.elementwise_add( x=trigram_blocking.delta_score_out, y=pre_scores_wo_len_penalty, axis=0) # => [N, topk] accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores_wo_len_penalty, axis=0) cur_timestep_length_penalty = layers.pow( ((5.0 + layers.cast(step_next_idx, accu_scores.dtype)) / 6.0), self.len_penalty) curr_scores = layers.elementwise_div( accu_scores, cur_timestep_length_penalty) # beam_search op uses lod to differentiate branches. curr_scores = layers.lod_reset(curr_scores, pre_ids) topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=curr_scores, beam_size=self.beam_size, end_id=self.eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) layers.increment(x=step_next_idx, value=1.0, in_place=True) # cell states(caches) have been updated in wrap_decoder, # only need to update beam search states here. layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.assign(gather_idx, parent_idx) layers.assign(pre_src_words_attn_bias, tgt_src_words_attn_bias) layers.assign(pre_src_sents_attn_bias, tgt_src_sents_attn_bias) layers.assign(pre_graph_attn_bias, graph_attn_bias) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not( layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=self.beam_size, end_id=self.eos_idx) return finished_ids, finished_scores
def fast_decode(src_vocab_size, trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, beam_size, max_out_len, bos_idx, eos_idx, model_input=None): """ Use beam search to decode. Caches will be used to store states of history steps which can make the decoding faster. """ enc_inputs = (model_input.src_word, model_input.src_pos, model_input.src_slf_attn_bias) dec_inputs = (model_input.trg_word, model_input.init_score, model_input.init_idx, model_input.trg_src_attn_bias) enc_output = wrap_encoder(src_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, enc_inputs, bos_idx=bos_idx) start_tokens, init_scores, parent_idx, trg_src_attn_bias = dec_inputs max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=max_out_len, force_cpu=True) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [] for i in range(n_layer): caches.append({ "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_key], dtype=enc_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, n_head, 0, d_value], dtype=enc_output.dtype, value=0), "static_k": # for encoder-decoder attention layers.create_tensor(dtype=enc_output.dtype), "static_v": # for encoder-decoder attention layers.create_tensor(dtype=enc_output.dtype) }) _do_beam_search(trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, beam_size, max_len, bos_idx, eos_idx, ids, scores, parent_idx, trg_src_attn_bias, caches, enc_output, step_idx) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=beam_size, end_id=eos_idx) return finished_ids, finished_scores
exe.run(start_prog) # simulate saving model fluid.io.save_persistables(exe, dirname="old", main_program=main_prog) ############################################################################# # The following section illustrates what user should do to adjust parameter # ############################################################################# # The target 'weight' is the concatenation of original 'weight' and a # supplement weight filled 0 of shape (4, 8) zeros = np.zeros((4, 8)).astype('float32') main_prog = fluid.Program() start_prog = fluid.Program() with fluid.program_guard(main_prog, start_prog): w_part1 = layers.create_tensor(dtype='float32') layers.load(w_part1, file_path='old/weight') w_part2 = layers.assign(zeros) new_w = layers.concat([w_part1, w_part2], axis=0) main_prog.current_block().append_op(type='save', inputs={'X': [new_w]}, outputs={}, attrs={'file_path': 'new/weight'}) exe = fluid.Executor(fluid.CPUPlace()) exe.run(start_prog) ret = exe.run(main_prog, fetch_list=[new_w.name]) target = np.concatenate((ones, zeros), axis=0) assert np.array_equal(ret[0], target)