def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection( input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection( input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj): with layer.mixed(size=cur_token.size, bias_attr=Attr.Param(direct + '.bp', initial_std=0.), act=Act.Linear()) as proj: proj += layer.full_matrix_projection(input=cur_token, param_attr=Attr.Param(direct + '.wp')) proj += layer.full_matrix_projection(input=prev, param_attr=Attr.Param(direct + '.wr')) expanded = layer.expand(input=proj, expand_as=to_apply) att_context = layer.addto(input=[expanded, to_apply_proj], act=Act.Tanh(), bias_attr=False) att_weights = layer.fc(input=att_context, param_attr=Attr.Param(direct + '.w'), bias_attr=Attr.Param(direct + '.b', initial_std=0.), act=Act.SequenceSoftmax(), size=1) scaled = layer.scaling(input=to_apply, weight=att_weights) applied = layer.pooling(input=scaled, pooling_type=paddle.pooling.Sum()) return applied
def _u_step(self, h_cur, u): s = self._step_basic(h_cur, u) with layer.mixed(size=1, bias_attr=False, act=Act.SequenceSoftmax()) as h_weights: h_weights += layer.identity_projection(s) applied_weights = layer.scaling(input=u, weight=h_weights) u_ctx = layer.pooling(input=applied_weights, pooling_type=paddle.pooling.Sum()) return u_ctx
def sum_weighted_loss(loss, weight, size=1): """Loss has input batch_size x image_size, weight has input batch_size x weight ( i * w ) / sum(W) The output is normalized weighted loss """ weighted_loss = pd.mixed( size=size, input=[pd.dotmul_operator(a=loss, b=weight, scale=1.0)]) weight_fac = pd.sum_cost(input=weight) weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv()) weighted_loss = pd.scaling(input=loss, weight=weight_fac) weighted_loss = pd.sum_cost(input=weighted_loss) return weighted_loss
def test_math_layer(self): addto = layer.addto(input=[pixel, pixel]) linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) interpolation = layer.interpolation( input=[hidden, hidden], weight=score) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) power = layer.power(input=pixel, weight=score) scaling = layer.scaling(input=pixel, weight=score) slope = layer.slope_intercept(input=pixel) tensor = layer.tensor(a=pixel, b=pixel, size=1000) cos_sim = layer.cos_sim(a=pixel, b=pixel) trans = layer.trans(input=tensor) print layer.parse_network(addto, linear_comb, interpolation, power, scaling, slope, tensor, cos_sim, trans)
def ns_ele_l2_cost(input, label, weight, height, width, num_channel=None, interp='nearest'): assert interp in image_resize_func.keys() # make sure all the input label and weight have the same size input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = image_resize_func[interp](input=label, out_size_x=width, out_size_y=height) weight = image_resize_func[interp](input=weight, out_size_x=width, out_size_y=height) # reshape the orignal layer # input has shape c x h x w change to h x w x c input_ts = pd.transpose(input=input, trans_order=[1, 2, 0], height=height, width=width) input_rs = pd.resize(input=input_ts, size=num_channel, height=1, width=1) label_ts = pd.transpose(input=label, trans_order=[1, 2, 0], height=height, width=width) label_rs = pd.resize(input=label_ts, size=num_channel, height=1, width=1) weight_rs = pd.resize(input=weight, size=1, height=1, width=1) cost_rs = pd.mse_cost(input=input_rs, label=label_rs) sqrt_l2_cost = util_layers.math_op(input=cost_rs, act=pd.activation.Sqrt()) sqrt_l2_cost = pd.mixed( size=1, input=[pd.dotmul_operator(a=sqrt_l2_cost, b=weight_rs, scale=1.0)]) sqrt_l2_cost = pd.resize(input=sqrt_l2_cost, size=height * width, height=height, width=width) weight_fac = pd.sum_cost(input=weight) weight_fac = util_layers.math_op(input=weight_fac, act=pd.activation.Inv()) sqrt_l2_cost = pd.scaling(input=sqrt_l2_cost, weight=weight_fac) cost = pd.sum_cost(input=sqrt_l2_cost) return cost
def _attention_flow(self, h, u): bs = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._h_step, reverse=False) b_weights = layer.mixed(act=Act.SequenceSoftmax(), bias_attr=False, input=layer.identity_projection(bs)) h_step_scaled = layer.scaling(input=h, weight=b_weights) h_step = layer.pooling(input=h_step_scaled, pooling_type=paddle.pooling.Sum()) h_expr = layer.expand(input=h_step, expand_as=h) u_expr = layer.recurrent_group(input=[h, layer.StaticInput(u)], step=self._u_step, reverse=False) g = self._beta(h, u_expr, h_expr) return g
def test_math_layer(self): addto = layer.addto(input=[pixel, pixel]) linear_comb = layer.linear_comb( weights=combine_weight, vectors=hidden, size=10) interpolation = layer.interpolation( input=[hidden, hidden], weight=score) bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) power = layer.power(input=pixel, weight=score) scaling = layer.scaling(input=pixel, weight=score) slope = layer.slope_intercept(input=pixel) tensor = layer.tensor(a=pixel, b=pixel, size=1000) cos_sim = layer.cos_sim(a=pixel, b=pixel) trans = layer.trans(input=tensor) print layer.parse_network([ addto, linear_comb, interpolation, power, scaling, slope, tensor, cos_sim, trans ])
def ele_norm_cost(input, label, weight, height=None, width=None, num_channel=None, cost_type='l1'): if height > 1 and width > 1: input = pd.bilinear_interp(input=input, out_size_x=width, out_size_y=height) label = pd.bilinear_interp(input=label, out_size_x=width, out_size_y=height) if weight: weight = pd.nearest_interp(input=weight, out_size_x=width, out_size_y=height) size = height * width * num_channel if weight: input = pd.mixed( size=size, input=[pd.dotmul_operator(a=input, b=weight, scale=1.0)]) label = pd.mixed( size=size, input=[pd.dotmul_operator(a=label, b=weight, scale=1.0)]) cost = cost_func[cost_type](input=input, label=label) fac = pd.sum_cost(input=weight) fac = util_layers.math_op(input=fac, act=pd.activation.Inv()) cost = pd.scaling(input=cost, weight=fac) cost = pd.sum_cost(input=cost) else: cost = cost_func[cost_type](input=input, label=label) fac = 1.0 / float(height * width) cost = pd.slope_intercept(input=cost, slope=fac, intercept=0.0) cost = pd.sum_cost(input=cost) return cost