def decoder_step(gru_unit, cue_gru_unit, step_in, hidden, input_size, hidden_size, memory, memory_mask, knowledge, mask=None): """ decoder step """ # get attention out # get hidden top layers top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1]) top_hidden = layers.squeeze(top_hidden, axes=[0]) top_hidden = layers.unsqueeze(top_hidden, axes=[1]) weight_memory, attn = dot_attention(top_hidden, memory, memory_mask) step_in = layers.unsqueeze(step_in, axes=[1]) rnn_input_list = [step_in, weight_memory] if weight_memory.shape[0] == -1: knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape) else: knowledge_1 = knowledge cue_input_list = [knowledge_1, weight_memory] output_list = [weight_memory] rnn_input = layers.concat(rnn_input_list, axis=2) rnn_input = layers.squeeze(rnn_input, axes=[1]) rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask) cue_input = layers.concat(cue_input_list, axis=2) cue_input = layers.squeeze(cue_input, axes=[1]) cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask) h_y = layers.tanh( fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1")) h_cue = layers.tanh( fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2")) concate_y_cue = layers.concat([h_y, h_cue], axis=2) k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3')) new_hidden = h_y * k - h_cue * (k - 1.0) new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2]) output_list.append(new_hidden_tmp) real_out = layers.concat(output_list, axis=2) if mask: mask_tmp = layers.unsqueeze(mask, axes=[0]) new_hidden = layers.elementwise_mul((new_hidden - hidden), mask_tmp, axis=0) new_hidden += hidden return real_out, new_hidden
def forward(self, input): if self.inplace: input.set_value(layers.tanh(input)) return input else: y = layers.tanh(input) return y
def forward(self, encoded_ref, encoded_label, encoded_label_raw, conv_weights, norm_weights): x = encoded_ref x_raw = None for i in range(self.num_downsamples, -1, -1): # 5 -> 0 conv_weight = norm_weight = [None] * 3 if self.use_hyper_conv and i < self.num_hyper_layers: conv_weight = conv_weights[i] if self.use_hyper_spade and i < self.num_hyper_layers: norm_weight = norm_weights[i] # Main branch residual blocks. x = self.one_up_conv_layer(x, encoded_label, conv_weight, norm_weight, i) # For raw output generation. if self.generate_raw_output and i < self.num_multi_spade_layers: x_raw = self.one_up_conv_layer(x_raw, encoded_label_raw, conv_weight, norm_weight, i) else: x_raw = x # Final conv layer. if self.generate_raw_output: img_raw = L.tanh(self.conv_img(x_raw)) else: img_raw = None img_final = L.tanh(self.conv_img(x)) return img_final, img_raw
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size, para_name, args): """Util function for pointer network""" def linear(inputs, para_name, args): return layers.fc(input=inputs, size=size, param_attr=fluid.ParamAttr(name=para_name + '_w'), bias_attr=fluid.ParamAttr(name=para_name + '_b')) input_cat = layers.concat([hidden_t_prev, x_t], axis=1) forget_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_f', args)) input_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_i', args)) output_gate = layers.sigmoid(x=linear(input_cat, para_name + '_lstm_o', args)) cell_tilde = layers.tanh(x=linear(input_cat, para_name + '_lstm_c', args)) cell_t = layers.sums(input=[ layers.elementwise_mul( x=forget_gate, y=cell_t_prev), layers.elementwise_mul( x=input_gate, y=cell_tilde) ]) hidden_t = layers.elementwise_mul(x=output_gate, y=layers.tanh(x=cell_t)) return hidden_t, cell_t
def forward(self, input, pre_hidden, pre_cell): concat_input_hidden = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) gate_input = layers.elementwise_add(gate_input, self._bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) new_cell = layers.elementwise_add( layers.elementwise_mul( pre_cell, layers.sigmoid(layers.elementwise_add(f, self._forget_bias))), layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j))) new_hidden = layers.tanh(new_cell) * layers.sigmoid(o) return new_hidden, new_cell
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 5) tmp_0 = layers.assign(self.feed_vars[0]) # subgraph with 9 op nodes tmp_1 = tmp_0 * layers.sigmoid(self.feed_vars[1]) + layers.sigmoid( self.feed_vars[2]) * layers.tanh(self.feed_vars[3]) tmp_2 = layers.tanh(tmp_1) + layers.sigmoid(self.feed_vars[4]) self.append_gradients(tmp_2) self.num_fused_ops = 2 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 64], dtype, 5) one = layers.fill_constant(shape=[1], dtype=dtype, value=1.0) tmp_0 = one * self.feed_vars[0] # subgraph with 9 op nodes tmp_1 = tmp_0 * layers.sigmoid(self.feed_vars[1]) + layers.sigmoid( self.feed_vars[2]) * layers.tanh(self.feed_vars[3]) tmp_2 = layers.tanh(tmp_1) + layers.sigmoid(self.feed_vars[4]) self.append_gradients(tmp_2) self.num_fused_ops = 2 self.fetch_list = [tmp_2, self.grad(tmp_0)]
def test_tanh(self): program = Program() with program_guard(program): input = layers.data(name="input", shape=[16], dtype="float32") out = layers.tanh(input, name='tanh') self.assertIsNotNone(out) print(str(program))
def add_input(self, x, condition=None): """Add a step input. This method works similarily with `forward` but in a `step-in-step-out` fashion. Args: x (Variable): shape(B, C_res, T=1), input for a step, dtype float32. condition (Variable, optional): shape(B, C_cond, T=1). condition for a step, dtype float32. Defaults to None. Returns: (residual, skip_connection) residual (Variable): shape(B, C_res, T=1), the residual for a step, which is used as the input to the next layer of ResidualBlock. skip_connection (Variable): shape(B, C_res, T=1), the skip connection for a step. This output is accumulated with that of other ResidualBlocks. """ h = x # dilated conv h = self.conv.add_input(h) # condition if condition is not None: h += self.condition_proj(condition) # gated tanh content, gate = F.split(h, 2, dim=1) z = F.sigmoid(gate) * F.tanh(content) # projection residual = F.scale(z + x, np.sqrt(0.5)) skip_connection = z return residual, skip_connection
def forward(self, input): """ Compute the mel spectrum. Args: input (Variable): shape(B, T, C), dtype float32, the result of mel linear projection. Returns: output (Variable): shape(B, T, C), the result after postconvnet. """ input = layers.transpose(input, [0, 2, 1]) len = input.shape[-1] for i in range(self.num_conv - 1): batch_norm = self.batch_norm_list[i] conv = self.conv_list[i] input = layers.dropout(layers.tanh( batch_norm(conv(input)[:, :, :len])), self.dropout, dropout_implementation='upscale_in_train') conv = self.conv_list[self.num_conv - 1] input = conv(input)[:, :, :len] if self.batchnorm_last: batch_norm = self.batch_norm_list[self.num_conv - 1] input = layers.dropout(batch_norm(input), self.dropout, dropout_implementation='upscale_in_train') output = layers.transpose(input, [0, 2, 1]) return output
def forward(self, x, condition=None): """Conv1D gated-tanh Block. Args: x (Variable): shape(B, C_res, T), the input. (B stands for batch_size, C_res stands for residual channels, T stands for time steps.) dtype float32. condition (Variable, optional): shape(B, C_cond, T), the condition, it has been upsampled in time steps, so it has the same time steps as the input does.(C_cond stands for the condition's channels). Defaults to None. Returns: (residual, skip_connection) residual (Variable): shape(B, C_res, T), the residual, which is used as the input to the next layer of ResidualBlock. skip_connection (Variable): shape(B, C_res, T), the skip connection. This output is accumulated with that of other ResidualBlocks. """ time_steps = x.shape[-1] h = x # dilated conv h = self.conv(h) if h.shape[-1] != time_steps: h = h[:, :, :time_steps] # condition if condition is not None: h += self.condition_proj(condition) # gated tanh content, gate = F.split(h, 2, dim=1) z = F.sigmoid(gate) * F.tanh(content) # projection residual = F.scale(z + x, math.sqrt(.5)) skip_connection = z return residual, skip_connection
def forward(self, input, class_id, input_class_emb=False): if isinstance(input, list): codes = [input[0]] codes += [ input[2 * i + 1:2 * i + 3] for i in range(len(input) // 2) ] else: codes = layers.split(input, self.num_split, 1) if not input_class_emb: class_emb = self.embed_y(class_id) # 128 else: class_emb = class_id out = self.noise_fc(codes[0]) out = layers.transpose(layers.reshape(out, (out.shape[0], 4, 4, -1)), (0, 3, 1, 2)) for i, (code, gblock) in enumerate(zip(codes[1:], self.blocks)): if isinstance(input, list): condition = [layers.concat([c, class_emb], 1) for c in code] else: condition = layers.concat([code, class_emb], 1) out = gblock(out, condition) out = self.output_layer_bn(out) out = layers.relu(out) out = self.output_layer_conv(out) return (layers.tanh(out) + 1) / 2
def forward(self, inputs, labels=None, logits_softmax=False): """前向预测 """ #logging.info("inputs shape: {}".format(inputs.shape)) emb = self.embedding(inputs) #logging.info("emb shape: {}".format(emb.shape)) emb_dropout = self.dropout(emb) lstm_forward, _ = self._lstm_forward(emb_dropout) #logging.info("lstm_forward shape: {}".format(lstm_forward.shape)) lstm_forward_tanh = L.tanh(lstm_forward) if self.bi_direction: lstm_backward, _ = self._lstm_backward(emb_dropout) lstm_backward_tanh = L.tanh(lstm_backward) encoded_vector = L.concat( input=[lstm_forward_tanh, lstm_backward_tanh], axis=-1) encoded_vector = L.reduce_max(encoded_vector, dim=1) else: encoded_vector = L.reduce_max(lstm_forward_tanh, dim=1) #logging.info("encoded_vector shape: {}".format(encoded_vector.shape)) hid_fc_2 = self._hid_fc2(encoded_vector) #logging.info("hid_fc_2 shape: {}".format(hid_fc_2.shape)) logits = self._output_fc(hid_fc_2) #logging.info("logits shape: {}".format(logits.shape)) # 输出logits为softmax后的结果 if logits_softmax: logits = L.softmax(logits) # 如果没有给标签 则输出logits结果 if labels is None: return logits if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) #print("labels shape: {}".format(labels.shape)) loss = L.softmax_with_cross_entropy(logits, labels) # 如果输出logits的激活函数为softmax 则不能用softmax_with_cross_entropy #loss = L.cross_entropy(logits, labels) loss = L.reduce_mean(loss) return loss, logits
def forward(self, input, state): #logging.info("input shape: {}".format(input.shape)) pre_hidden, pre_cell = state #logging.info("pre hidden shape: {}".format(pre_hidden.shape)) #logging.info("pre cell shape: {}".format(pre_cell.shape)) # i,f,c,o 四个值均有Wx+Wh+b 即W(x+h)+b # 因此: # 实际相乘为[x, b]·W+b # x,b 横向相连, shape为[batch_size, input_size+hidden_size] # W的shape为[input_size+hidden_size, 4*hidden_size] # b的shape为[4*hidden_size,] # 横向连接 # shape: [batch_size, input_size+hidden_size] concat_input_hidden = L.concat([input, pre_hidden], axis=1) #logging.info("x concat h shape: {}".format(concat_input_hidden.shape)) # 计算Wx+Wh+b # shape: [batch_size, 4*hidden_size] gate_input = L.matmul(x=concat_input_hidden, y=self._weight) #logging.info("[x, b]·W shape: {}".format(gate_input.shape)) # shape: [batch_size, 4*hidden_size] gate_input = L.elementwise_add(gate_input, self._bias) #logging.info("[x, b]·W+b shape: {}".format(gate_input.shape)) # i,f,c,o四值按最后一维分开 因此每个的最后一维都是hidden_size i, f, c, o = L.split(gate_input, num_or_sections=4, dim=-1) # new_c = pre_c·sigmoid(f+forget_bias) + sigmoid(i)·tanh(c) # shape: [batch_size, hidden_size] new_cell = L.elementwise_add( L.elementwise_mul( pre_cell, L.sigmoid(L.elementwise_add(f, self._forget_bias))), L.elementwise_mul(L.sigmoid(i), L.tanh(c)) ) #logging.info("new_cell shape: {}".format(new_cell.shape)) # new_h = tanh(new_c)*sigmoid(o) # shape: [batch_size, hidden_size] new_hidden = L.tanh(new_cell) * L.sigmoid(o) #logging.info("new_hidden shape: {}".format(new_hidden.shape)) return new_hidden, [new_hidden, new_cell]
def act(a, act='tanh'): if act == 'tanh': return layers.tanh(a) elif act == 'sigmoid': return layers.sigmoid(a) elif act == 'relu': return layers.relu(a) else: return a
def forward(self, input_tensor, cur_state): h_cur = cur_state x_in = concat([input_tensor, h_cur], axis=1) update = sigmoid(self.update_gate(x_in)) reset = sigmoid(self.reset_gate(x_in)) x_out = tanh( self.out_gate(concat([input_tensor, h_cur * reset], axis=1))) h_new = h_cur * (1 - update) + x_out * update return h_new
def attention(self, node_type_embed, trans_w_s1, trans_w_s2): """Calculate attention weights. """ attention = fl.tanh(fl.matmul(node_type_embed, trans_w_s1)) attention = fl.matmul(attention, trans_w_s2) attention = fl.reshape(attention, [-1, self.u_num]) attention = fl.softmax(attention) attention = fl.reshape(attention, [-1, self.att_head, self.u_num]) return attention
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): def linear(inputs): return layers.fc(input=inputs, size=size, bias_attr=True) forget_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) input_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) output_gate = layers.sigmoid(x=linear([hidden_t_prev, x_t])) cell_tilde = layers.tanh(x=linear([hidden_t_prev, x_t])) cell_t = layers.sums(input=[ layers.elementwise_mul( x=forget_gate, y=cell_t_prev), layers.elementwise_mul( x=input_gate, y=cell_tilde) ]) hidden_t = layers.elementwise_mul( x=output_gate, y=layers.tanh(x=cell_t)) return hidden_t, cell_t
def forward(self, inputs, labels=None, logits_softmax=False): """前向预测 """ emb = self.embedding(inputs) hid_fc1 = self._hid_fc1(emb) gru_forward = self._gru_forward(hid_fc1) gru_forward_tanh = L.tanh(gru_forward) if self.bi_direction: gru_backward = self._gru_backward(hid_fc1) gru_backward_tanh = L.tanh(gru_backward) encoded_vector = L.concat( input=[gru_forward_tanh, gru_backward_tanh], axis=2) encoded_vector = L.reduce_max(encoded_vector, dim=1) else: encoded_vector = L.reduce_max(gru_forward_tanh, dim=1) hid_fc_2 = self._hid_fc2(encoded_vector) logits = self._output_fc(hid_fc_2) # 输出logits为softmax后的结果 if logits_softmax: logits = L.softmax(logits) # 如果没有给标签 则输出logits结果 if labels is None: return logits if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) #print("labels shape: {}".format(labels.shape)) loss = L.softmax_with_cross_entropy(logits, labels) # 如果输出logits的激活函数为softmax 则不能用softmax_with_cross_entropy #loss = L.cross_entropy(logits, labels) loss = L.reduce_mean(loss) return loss, logits
def func(self, place): shape = [2, 3, 7, 9] eps = 0.0005 dtype = np.float64 x = layers.data('x', shape, False, dtype=dtype) x.persistable = True y = layers.tanh(x) x_arr = np.random.random(shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.triple_grad_check([x], y, x_init=x_arr, place=place, eps=eps)
def step(self, x_t, h_t_1, c_t_1): i_t = layers.sigmoid( conv2d(x_t, self.filters, self.filter_size, bias=True) + conv2d(h_t_1, self.filters, self.filter_size)) f_t = layers.sigmoid( add( conv2d(x_t, self.filters, self.filter_size, bias=True) + conv2d(h_t_1, self.filters, self.filter_size), self.forget_bias)) o_t = layers.sigmoid( conv2d(x_t, self.filters, self.filter_size, bias=True) + conv2d(h_t_1, self.filters, self.filter_size)) c_t_ = layers.tanh( conv2d(x_t, self.filters, self.filter_size, bias=True) + conv2d(h_t_1, self.filters, self.filter_size)) c_t = add(dot(f_t, c_t_1), dot(i_t, c_t_)) h_t = dot(o_t, layers.tanh(c_t)) return o_t, h_t, c_t
def static_rnn(step, p_vec=p_vec, init_state=None, para_name='', args=args): tag = para_name + "static_rnn_" ctx = layers.fc( input=p_vec, param_attr=fluid.ParamAttr(name=tag + 'context_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'context_fc_b'), size=hidden_size, act=None) beta = [] c_prev = init_state m_prev = init_state for i in range(step): m_prev0 = layers.fc( input=m_prev, size=hidden_size, act=None, param_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'm_prev0_fc_b')) m_prev1 = layers.sequence_expand(x=m_prev0, y=ctx) Fk = ctx + m_prev1 Fk = layers.tanh(Fk) logits = layers.fc( input=Fk, size=1, act=None, param_attr=fluid.ParamAttr(name=tag + 'logits_fc_w'), bias_attr=fluid.ParamAttr(name=tag + 'logits_fc_b')) scores = layers.sequence_softmax(input=logits) attn_ctx = layers.elementwise_mul(x=p_vec, y=scores, axis=0) attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum') hidden_t, cell_t = lstm_step( attn_ctx, hidden_t_prev=m_prev, cell_t_prev=c_prev, size=hidden_size, para_name=tag, args=args) m_prev = hidden_t c_prev = cell_t beta.append(scores) return beta
def func(self, place): shape = [2, 3, 7, 9] eps = 0.0005 dtype = np.float64 x = layers.data('x', shape, False, dtype=dtype) x.persistable = True y = layers.tanh(x) x_arr = np.random.random(shape).astype(dtype) x_arr[np.abs(x_arr) < 0.005] = 0.002 gradient_checker.triple_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.tanh_wrapper, [x], y, x_init=x_arr, place=place) fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def PredictionModule(x, num_priors, num_classes, mask_dim, shared_conv_w, shared_conv_b, shared_bbox_w, shared_bbox_b, shared_conf_w, shared_conf_b, shared_mask_w, shared_mask_b): ''' 改编自DSSD算法中的PredictionModule,改成了3x3卷积。3个分支分别预测bbox、conf、mask系数。 x / | \ bbox conf mask ''' x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=shared_conv_w, bias_attr=shared_conv_b) x = P.relu(x) bbox_x = x conf_x = x mask_x = x bbox = P.conv2d(bbox_x, num_priors * 4, filter_size=(3, 3), stride=1, padding=1, param_attr=shared_bbox_w, bias_attr=shared_bbox_b) bbox = P.transpose(bbox, perm=[0, 2, 3, 1]) bbox = P.reshape(bbox, (P.shape(bbox)[0], -1, 4)) conf = P.conv2d(conf_x, num_priors * num_classes, filter_size=(3, 3), stride=1, padding=1, param_attr=shared_conf_w, bias_attr=shared_conf_b) conf = P.transpose(conf, perm=[0, 2, 3, 1]) conf = P.reshape(conf, (P.shape(conf)[0], -1, num_classes)) mask = P.conv2d(mask_x, num_priors * mask_dim, filter_size=(3, 3), stride=1, padding=1, param_attr=shared_mask_w, bias_attr=shared_mask_b) mask = P.transpose(mask, perm=[0, 2, 3, 1]) mask = P.reshape(mask, (P.shape(mask)[0], -1, mask_dim)) mask = P.tanh(mask) preds = {'loc': bbox, 'conf': conf, 'mask': mask} return preds
def gru_step(self, input, hidden, mask=None): """ gru step """ hidden_array = [] for i in range(self.num_layers): hidden_temp = layers.slice(hidden, axes=[0], starts=[i], ends=[i + 1]) hidden_temp = layers.reshape(hidden_temp, shape=[-1, self.hidden_size]) hidden_array.append(hidden_temp) last_hidden_array = [] for k in range(self.num_layers): trans_input = layers.matmul(input, self.weight_input_array[k]) trans_input += self.bias_input_array[k] trans_hidden = layers.matmul(hidden_array[k], self.weight_hidden_array[k]) trans_hidden += self.bias_hidden_array[k] input_array = layers.split(trans_input, num_or_sections=3, dim=-1) trans_array = layers.split(trans_hidden, num_or_sections=3, dim=-1) reset_gate = layers.sigmoid(input_array[0] + trans_array[0]) input_gate = layers.sigmoid(input_array[1] + trans_array[1]) new_gate = layers.tanh(input_array[2] + reset_gate * trans_array[2]) new_hidden = new_gate + input_gate * (hidden_array[k] - new_gate) if mask: neg_mask = layers.fill_constant_batch_size_like( input=mask, shape=[1], value=1.0, dtype='float32') - mask new_hidden = new_hidden * mask + hidden_array[k] * neg_mask last_hidden_array.append(new_hidden) input = new_hidden if self.dropout and self.dropout > 0.0: input = layers.dropout(input, dropout_prob=self.dropout) last_hidden = layers.concat(last_hidden_array, 0) last_hidden = layers.reshape( last_hidden, shape=[self.num_layers, -1, self.hidden_size]) return input, last_hidden
def forward(self, inputs): assert len(inputs) == len(self.in_channels) outs = [] out = inputs[-1] outs.append(out) for i in range(self.num_ins): out = L.resize_nearest(out, scale=2, align_corners=False) out = L.pad2d(out, [0, 1, 0, 1]) out = self.conv2x2[i](out) if i < 4: out = L.concat([out, inputs[-i - 2]], axis=1) identity = self.conv1x1[i](out) out = self.deres_layers[i](out) + identity outs.append(out) outs[-1] = L.tanh(outs[-1]) return tuple(outs)
def simple_attention(self, encoder_vec, encoder_proj, decoder_state, decoder_size): decoder_state_proj = layers.fc(input=decoder_state, size=decoder_size, bias_attr=False, name="decoder_state_proj_fc") decoder_state_expand = layers.sequence_expand( x=decoder_state_proj, y=encoder_proj) concated = layers.elementwise_add(encoder_proj, decoder_state_expand) concated = layers.tanh(x=concated) attention_weights = layers.fc(input=concated, size=1, act=None, bias_attr=False, name="attention_weights_fc") attention_weights = layers.sequence_softmax(input=attention_weights) weigths_reshape = layers.reshape(x=attention_weights, shape=[-1]) scaled = layers.elementwise_mul( x=encoder_vec, y=weigths_reshape, axis=0) context = layers.sequence_pool(input=scaled, pool_type='sum') return context
def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape( pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape( pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split( input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) try: from paddle.fluid.contrib.layers import fused_elemwise_activation # fluid.contrib.layers.fused_elemwise_activation can do a fused # operation, like: # 1) x + sigmoid(y); x + tanh(y) # 2) tanh(x + y) # Now the unary operation supported in this fused op is limit, and # we will extent this operation to support more unary operations and # do this kind of fusion automitically in future version of paddle.fluid. # layers.sigmoid(i) * layers.tanh(j) tmp0 = fused_elemwise_activation( x=layers.tanh(j), y=i, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) # pre_cell * layers.sigmoid(f) tmp1 = fused_elemwise_activation( x=pre_cell, y=f, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) c = tmp0 + tmp1 # layers.tanh(c) * layers.sigmoid(o) m = fused_elemwise_activation( x=layers.tanh(c), y=o, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) except ImportError: c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape( last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape( last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape( real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell
def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) cell_array.append(pre_cell) input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): input = rnn.step_input(input_embedding) for k in range(num_layers): pre_hidden = rnn.memory(init=hidden_array[k]) pre_cell = rnn.memory(init=cell_array[k]) weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i = layers.slice( gate_input, axes=[1], starts=[0], ends=[hidden_size]) j = layers.slice( gate_input, axes=[1], starts=[hidden_size], ends=[hidden_size * 2]) f = layers.slice( gate_input, axes=[1], starts=[hidden_size * 2], ends=[hidden_size * 3]) o = layers.slice( gate_input, axes=[1], starts=[hidden_size * 3], ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) rnn.update_memory(pre_hidden, m) rnn.update_memory(pre_cell, c) rnn.step_output(m) rnn.step_output(c) input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') rnn.step_output(input) rnnout = rnn() last_hidden_array = [] last_cell_array = [] real_res = rnnout[-1] for i in range(num_layers): m = rnnout[i * 2] c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True last_h = layers.slice( m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_hidden_array.append(last_h) last_c = layers.slice( c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) return real_res, last_hidden, last_cell
def forward(self, x): return tanh(x)