def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection( input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection( input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _u_step(self, h_cur, u): s = self._step_basic(h_cur, u) with layer.mixed(size=1, bias_attr=False, act=Act.SequenceSoftmax()) as h_weights: h_weights += layer.identity_projection(s) applied_weights = layer.scaling(input=u, weight=h_weights) u_ctx = layer.pooling(input=applied_weights, pooling_type=paddle.pooling.Sum()) return u_ctx
def test_aggregate_layer(self): pool = layer.pooling(input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.EACH_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
def test_aggregate_layer(self): pool = layer.pooling( input=pixel, pooling_type=pooling.Avg(), agg_level=layer.AggregateLevel.TO_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) seq_concat = layer.seq_concat(a=last_seq, b=first_seq) print layer.parse_network( [pool, last_seq, first_seq, concat, seq_concat])
def _attention_flow(self, h, u): bs = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._h_step, reverse=False) b_weights = layer.mixed(act=Act.SequenceSoftmax(), bias_attr=False, input=layer.identity_projection(bs)) h_step_scaled = layer.scaling(input=h, weight=b_weights) h_step = layer.pooling(input=h_step_scaled, pooling_type=paddle.pooling.Sum()) h_expr = layer.expand(input=h_step, expand_as=h) u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._u_step, reverse=False) g = self._beta(h, u_expr, h_expr) return g
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group( self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group( self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def network(self): """ Implements the detail of the model. """ self.check_and_create_data() self.create_shared_params() q_enc = self.get_enc(self.q_ids, type='q') a_enc = self.get_enc(self.a_ids, type='q') q_proj_left = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_left.wq'), input=q_enc) q_proj_right = layer.fc(size=self.emb_dim * 2, bias_attr=False, param_attr=Attr.Param(self.name + '_right.wq'), input=q_enc) left_match = self.recurrent_group(self.name + '_left', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_left), a_enc], reverse=False) right_match = self.recurrent_group(self.name + '_right', [layer.StaticInput(q_enc), layer.StaticInput(q_proj_right), a_enc], reverse=True) match_seq = layer.concat(input=[left_match, right_match]) with layer.mixed(size=match_seq.size, act=Act.Identity(), layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2), bias_attr=False) as dropped: dropped += layer.identity_projection(match_seq) match_result = layer.pooling(input=dropped, pooling_type=paddle.pooling.Max()) cls = layer.fc(input=match_result, act=Act.Softmax(), size=self.label_dim) return cls
def _h_step(self, h_cur, u): s = self._step_basic(h_cur, u) step_max = layer.pooling(input=s, pooling_type=paddle.pooling.Max()) return step_max