def func(self, place): x_shape = [2, 3, 4, 5] perm = [0, 2, 3, 1] dtype = np.float64 x = layers.data('x', x_shape, False, dtype) x.persistable = True out = paddle.transpose(x, perm) x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype) gradient_checker.double_grad_check([x], out, x_init=x_arr, place=place)
def forward(self, input_ids, token_type_ids=None): sequence_output, _ = self.bert(input_ids, token_type_ids=token_type_ids, position_ids=None, attention_mask=None) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def forward(self, x): #Notation from https://arxiv.org/pdf/1805.08318.pdf size = x.shape x = paddle.reshape(x, list(size[:2]) + [-1]) f, g, h = self.query(x), self.key(x), self.value(x) beta = paddle.nn.functional.softmax(paddle.bmm( paddle.transpose(f, [0, 2, 1]), g), axis=1) o = self.gamma * paddle.bmm(h, beta) + x return paddle.reshape(o, size)
def transpose(self,*perm): # if len(perm)==2 and len(self.shape)>2: if isinstance(perm[0],Iterable): return paddle.transpose(self,perm[0]) ###only swap two axis perm2=list(range(len(self.shape))) a=perm2[perm[0]] perm2[perm[0]]=perm[1] perm2[perm[1]] =a perm=perm2 return self.permute(*perm)
def forward(self, edges, node_feat, edge_feat, segment_ids, is_training=True): # does not conduct link prediction, use all interactions # graph: pgl.Graph object # graph.node_feat['node_attr']: [bacth_size*3, 1] # graph.edge_feat['edge_attr']: [bact_size*6, 2] # graph.edges: [bact_size*6, 2] graph = Graph(edges=edges, node_feat={ "node_attr": node_feat }, edge_feat={ "edge_attr": edge_feat }).tensor() x, edge_index, sr = graph.node_feat[ 'node_attr'], graph.edges, graph.edge_feat['edge_attr'] _x = self.feature_emb(paddle.cast(x, 'int32')) _x = _x.squeeze(1) graph.node_feat['node_attr'] = _x if self.pred_edges: sr = paddle.transpose(sr, perm=[1, 0]) s, l0_penaty = self.linkpred(sr, is_training) pred_edge_index, pred_edge_weight = self.construct_pred_edge( edge_index, s) sub_graph = Graph(edges=pred_edge_index, node_feat={'node_attr': _x}) updated_nodes = self.sign(sub_graph, pred_edge_weight) else: updated_nodes = self.sign(graph, sr) l0_penaty = 0 l2_penaty = paddle.multiply(updated_nodes, updated_nodes).sum() while updated_nodes.shape[0] < segment_ids.shape[0]: updated_nodes = paddle.concat([ updated_nodes, paddle.to_tensor(paddle.zeros((1, self.dim)), dtype='float32') ], 0) # Add graph-average-pooling graph_embedding = pgl.math.segment_mean(updated_nodes, segment_ids) out = self.g(graph_embedding) out = paddle.clip(out, min=0, max=1) return out, l0_penaty, l2_penaty
def channel_shuffle(x, groups): batch_size, num_channels, height, width = x.shape[0:4] channels_per_group = num_channels // groups x = paddle.reshape( x=x, shape=[batch_size, groups, channels_per_group, height, width]) x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4]) x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width]) return x
def predict_word(dec_seq, enc_output, n_active_inst, n_bm, memory_key_padding_mask): dec_seq = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) dec_seq = self.positional_encoding(dec_seq) tgt_mask = self.generate_square_subsequent_mask( paddle.shape(dec_seq)[0]) dec_output = self.decoder( dec_seq, enc_output, tgt_mask=tgt_mask, tgt_key_padding_mask=None, memory_key_padding_mask=memory_key_padding_mask, ) dec_output = paddle.transpose(dec_output, [1, 0, 2]) dec_output = dec_output[:, -1, :] # Pick the last step: (bh * bm) * d_h word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1]) return word_prob
def _get_target_input(self, rpn_feats, anchors): rpn_score_list = [] rpn_delta_list = [] anchor_list = [] for (rpn_score, rpn_delta), (anchor, var) in zip(rpn_feats, anchors): rpn_score = paddle.transpose(rpn_score, perm=[0, 2, 3, 1]) rpn_delta = paddle.transpose(rpn_delta, perm=[0, 2, 3, 1]) rpn_score = paddle.reshape(x=rpn_score, shape=(0, -1, 1)) rpn_delta = paddle.reshape(x=rpn_delta, shape=(0, -1, 4)) anchor = paddle.reshape(anchor, shape=(-1, 4)) var = paddle.reshape(var, shape=(-1, 4)) rpn_score_list.append(rpn_score) rpn_delta_list.append(rpn_delta) anchor_list.append(anchor) rpn_scores = paddle.concat(rpn_score_list, axis=1) rpn_deltas = paddle.concat(rpn_delta_list, axis=1) anchors = paddle.concat(anchor_list) return rpn_scores, rpn_deltas, anchors
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None): r""" Args: input_ids (Tensor): See :class:`ErnieModel`. token_type_ids (Tensor, optional): See :class:`ErnieModel`. position_ids (Tensor, optional): See :class:`ErnieModel`. attention_mask (Tensor, optional): See :class:`ErnieModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers import ErnieForQuestionAnswering, ErnieTokenizer tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0') model = ErnieForQuestionAnswering.from_pretrained('ernie-1.0') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} logits = model(**inputs) """ sequence_output, _ = self.ernie(input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def __call__(self, predicts, batch): assert isinstance(predicts, (list, tuple)) features, predicts = predicts feats_reshape = paddle.reshape( features, [-1, features.shape[-1]]).astype("float64") label = paddle.argmax(predicts, axis=2) label = paddle.reshape(label, [label.shape[0] * label.shape[1]]) batch_size = feats_reshape.shape[0] #calc l2 distance between feats and centers square_feat = paddle.sum(paddle.square(feats_reshape), axis=1, keepdim=True) square_feat = paddle.expand(square_feat, [batch_size, self.num_classes]) square_center = paddle.sum(paddle.square(self.centers), axis=1, keepdim=True) square_center = paddle.expand( square_center, [self.num_classes, batch_size]).astype("float64") square_center = paddle.transpose(square_center, [1, 0]) distmat = paddle.add(square_feat, square_center) feat_dot_center = paddle.matmul(feats_reshape, paddle.transpose(self.centers, [1, 0])) distmat = distmat - 2.0 * feat_dot_center #generate the mask classes = paddle.arange(self.num_classes).astype("int64") label = paddle.expand(paddle.unsqueeze(label, 1), (batch_size, self.num_classes)) mask = paddle.equal( paddle.expand(classes, [batch_size, self.num_classes]), label).astype("float64") dist = paddle.multiply(distmat, mask) loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size return {'loss_center': loss}
def forward(self, node_feat, edge_feat=None): # x: bs*N*num_feat # compute abs(x_i, x_j) x_i = node_feat.unsqueeze(2) x_j = paddle.transpose(x_i, (0,2,1,3)) x_ij = paddle.abs(x_i - x_j) # size: bs x fs X N x N (2,128,11,11) x_ij = paddle.transpose(x_ij, (0,3,2,1)) if self.adj_type == 'sim': x_ij = paddle.exp(-x_ij) sim_val = self.sim_network(x_ij) diag_mask = 1.0 - paddle.expand(paddle.eye(node_feat.shape[1]),[node_feat.shape[0], 1, node_feat.shape[1], node_feat.shape[1]]) if self.activation == 'softmax': sim_val = self.softmax_with_mask(sim_val, diag_mask) elif self.activation == 'sigmoid': sim_val = F.sigmoid(sim_val) * diag_mask else: sim_val = sim_val * diag_mask if self.edge_dim == 2: if self.activation == 'softmax': dsim_val = self.softmax_with_mask(1 - sim_val, diag_mask) else: dsim_val = (1 - sim_val) * diag_mask adj_val = paddle.concat([sim_val, dsim_val], 1) else: adj_val = sim_val if self.top_k > 0: n_q, n_edge, n1, n2 = adj_val.shape k = min(self.top_k,n1) adj_temp = adj_val.reshape((n_q*n_edge*n1,n2)) topk, indices = paddle.topk(adj_temp, k) mask = F.one_hot(indices,adj_temp.shape[1]).sum(1) mask = mask.reshape((n_q, n_edge, n1, n2)) mask = paddle.cast(((mask + mask.transpose((0,1,3,2))) > 0),'float32') if self.activation == 'softmax': adj_val = self.softmax_with_mask(adj_val, mask) else: adj_val = adj_val * mask return adj_val, edge_feat
def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn, scale_factor): """ Args: locations (Variables): anchor points for current layer, [H*W, 2] box_cls (Variables): categories prediction, [N, C, H, W], C is the number of classes box_reg (Variables): bounding box prediction, [N, 4, H, W] box_ctn (Variables): centerness prediction, [N, 1, H, W] scale_factor (Variables): [h_scale, w_scale] for input images Return: box_cls_ch_last (Variables): score for each category, in [N, C, M] C is the number of classes and M is the number of anchor points box_reg_decoding (Variables): decoded bounding box, in [N, M, 4] last dimension is [x1, y1, x2, y2] """ act_shape_cls = self._merge_hw(box_cls) box_cls_ch_last = paddle.reshape(x=box_cls, shape=act_shape_cls) box_cls_ch_last = F.sigmoid(box_cls_ch_last) act_shape_reg = self._merge_hw(box_reg) box_reg_ch_last = paddle.reshape(x=box_reg, shape=act_shape_reg) box_reg_ch_last = paddle.transpose(box_reg_ch_last, perm=[0, 2, 1]) box_reg_decoding = paddle.stack( [ locations[:, 0] - box_reg_ch_last[:, :, 0], locations[:, 1] - box_reg_ch_last[:, :, 1], locations[:, 0] + box_reg_ch_last[:, :, 2], locations[:, 1] + box_reg_ch_last[:, :, 3] ], axis=1) box_reg_decoding = paddle.transpose(box_reg_decoding, perm=[0, 2, 1]) act_shape_ctn = self._merge_hw(box_ctn) box_ctn_ch_last = paddle.reshape(x=box_ctn, shape=act_shape_ctn) box_ctn_ch_last = F.sigmoid(box_ctn_ch_last) # recover the location to original image im_scale = paddle.concat([scale_factor, scale_factor], axis=1) box_reg_decoding = box_reg_decoding / im_scale box_cls_ch_last = box_cls_ch_last * box_ctn_ch_last return box_cls_ch_last, box_reg_decoding
def forward(self, x): with paddle.static.amp.fp16_guard(): if self.data_format == "NHWC": x = paddle.transpose(x, [0, 2, 3, 1]) x.stop_gradient = True x = self.stem(x) x = self.max_pool(x) x = self.blocks(x) x = self.avg_pool(x) x = self.flatten(x) x = self.fc(x) return x
def index_fill_(self, dim, index, val): x_shape = self.shape index_shape = index.shape if dim != 0: perm_list = list(range(len(x_shape))) while dim < 0: dim += len(x_shape) perm_list.pop(dim) perm_list = [dim] + perm_list self = paddle.transpose(self, perm=perm_list) s = x_shape.pop(dim) x_shape = [s] + x_shape updates_shape = index_shape + x_shape[1:] updates = paddle.full(updates_shape, fill_value=val, dtype=self.dtype) out = paddle.scatter(self, index, updates) if dim != 0: perm_list = list(range(len(x_shape))) perm_list.pop(0) perm_list.insert(dim, 0) out = paddle.transpose(out, perm=perm_list) paddle.assign(out, output=self)
def forward(self, prev_hidden, batch_H, char_onehots): batch_H_proj = self.i2h(batch_H) prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) res = paddle.add(batch_H_proj, prev_hidden_proj) res = paddle.tanh(res) e = self.score(res) alpha = F.softmax(e, axis=1) alpha = paddle.transpose(alpha, [0, 2, 1]) context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) concat_context = paddle.concat([context, char_onehots], 1) cur_hidden = self.rnn(concat_context, prev_hidden) return cur_hidden, alpha
def infer(path): data = process(path, img_height=32) data = data[np.newaxis, :] data = paddle.to_tensor(data, dtype='float32') # 执行识别 out = model(data) out = paddle.transpose(out, perm=[1, 0, 2]) out = paddle.nn.functional.softmax(out)[0] # 解码获取识别结果 out_string = ctc_greedy_decoder(out, vocabulary) print('预测结果:%s' % out_string)
def forward(self, inputs): if self.data_format == 'NHWC': image = inputs['image'] inputs['image'] = paddle.transpose(image, [0, 2, 3, 1]) self.inputs = inputs self.model_arch() if self.training: out = self.get_loss() else: out = self.get_pred() return out
def encoder_forward(self, src, src_mask=None, cache=None): """ Redefines `forward` function of `paddle.nn.TransformerEncoder` for integrating FasterTransformer for inference. The original `forward` function would not be replaced unless `enable_faster_encoder` is called by objects of its base class. After replacing, objects of `paddle.nn.TransformerEncoder` also have the same member variables as before. After inference, `disable_faster_encoder` could be called to restore the `forward` function of `paddle.nn.TransformerEncoder` and `paddle.nn.TransformerEncoderLayer`. Args: src (Tensor): The input of Transformer encoder. It is a tensor with shape `[batch_size, sequence_length, d_model]`. The data type should be float32 or float16. src_mask (Tensor, optional): A tensor used in multi-head attention to prevents attention to some unwanted positions, usually the paddings or the subsequent positions. It is a tensor with shape `[batch_size, 1, 1, sequence_length]`. The data type must be float, the unwanted positions have `-INF` values or other non-zeros and the wanted positions must be 0.0. Returns: output (Tensor|tuple): It is a tensor that has the same shape and data type as `src`, representing the output of Transformer encoder. Or a tuple if `cache` is not None, except for encoder output, the tuple includes the new cache which is same as input `cache` argument but `incremental_cache` in it has an incremental length. See `paddle.nn.MultiHeadAttention.gen_cache` and `paddle.nn.MultiHeadAttention.forward` for more details. """ if src_mask.dtype == paddle.float16: src_mask = paddle.cast(src_mask, "float32") src_mask = src_mask == 0.0 src_mask = paddle.cast(src_mask, src.dtype) # transpose_src_mask: [batch_size, 1, sequence_length, 1] transpose_src_mask = paddle.transpose(src_mask, perm=[0, 1, 3, 2]) # src_mask: [batch_size, 1, sequence_length, sequence_length] src_mask = src_mask * transpose_src_mask output = src for i, layer in enumerate(self.layers): output = layer(output, src_mask) if self.norm is not None: output = self.norm(output) return output
def build_P_paddle(self, I_r_size): I_r_height, I_r_width = I_r_size I_r_grid_x = (paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0) / I_r_width # self.I_r_width I_r_grid_y = ( paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0) / I_r_height # self.I_r_height # P: self.I_r_width x self.I_r_height x 2 P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) # n (= self.I_r_width x self.I_r_height) x 2 return P.reshape([-1, 2])
def test_transpose_by_complex_api(self): for dtype in self._dtypes: data = np.random.random( (2, 3, 4, 5)).astype(dtype) + 1J * np.random.random( (2, 3, 4, 5)).astype(dtype) perm = [3, 2, 0, 1] np_trans = np.transpose(data, perm) for place in self._places: with dg.guard(place): var = dg.to_variable(data) trans = paddle.transpose(var, perm=perm) self.assertTrue(np.allclose(trans.numpy(), np_trans))
def build_P_paddle(self, I_r_size): I_r_width, I_r_height = I_r_size I_r_grid_x = paddle.divide( (paddle.arange(-I_r_width, I_r_width, 2).astype('float32') + 1.0), paddle.to_tensor(I_r_width).astype('float32')) I_r_grid_y = paddle.divide( (paddle.arange(-I_r_height, I_r_height, 2).astype('float32') + 1.0), paddle.to_tensor(I_r_height).astype('float32')) P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) P = paddle.transpose(P, perm=[1, 0, 2]) return P.reshape([-1, 2])
def news_encode(self, category, sub_category, title, content): #[b,cate_d] cate_emb = self.cate_embedding(category) sub_cate_emb = self.sub_cate_embedding(sub_category) # [b, conv_out] category = paddle.nn.ReLU()(self.category_linear(cate_emb)) sub_category = paddle.nn.ReLU()(self.sub_category_linear(sub_cate_emb)) # title [batch, title_size] # title_emb [batch,title_size, word_emb_d] title_emb = self.word2vec_embedding(title) # title_emb [batch, word_emb_d, title_size] title_emb = paddle.transpose(title_emb, perm=[0, 2, 1]) # title_emb [batch,conv_out,title_size] title_emb = self.conv_title(title_emb) # content_emb [batch, content_size, word_emb_d] content_emb = self.word2vec_embedding(content) # content_emb [batch, word_emb_d,content_size,] content_emb = paddle.transpose(content_emb, perm=[0, 2, 1]) # [batch,conv_out,content_size] content_emb = self.conv_title(content_emb) # title_emb [batch,title_size,conv_out] # content_emb [batch, content_size, conv_out] title_emb = paddle.transpose(title_emb, perm=[0, 2, 1]) content_emb = paddle.transpose(content_emb, perm=[0, 2, 1]) title_emb = paddle.nn.ReLU()(paddle.add(title_emb, self.conv_title_bias)) content_emb = paddle.nn.ReLU()(paddle.add(content_emb, self.conv_content_bias)) # [b,conv_out] title_emb = self.title_attention(title_emb) content_emb = self.content_attention(content_emb) # [b,conv_out * 4] vec = paddle.concat([title_emb, content_emb, category, sub_category], axis=-1) # [b, 4, conv_out] vec_group = paddle.reshape(vec, [-1, 4, self.conv_out_channel_size]) # [b, conv_out] final_vec = self.mix_attention(vec_group) return final_vec
def forward(self, all_emb, q_emb=None, return_adj=False, return_emb=False): node_feat = all_emb if self.pre_dropout > 0: node_feat = self.predrop1(node_feat) edge_feat_list = [] if return_adj: x_i = node_feat.unsqueeze(2) x_j = paddle.transpose(x_i, (1, 2)) init_adj = paddle.abs(x_i - x_j) init_adj = paddle.transpose( init_adj, (1, 3)) # size: bs x fs X N x N (2,128,11,11) if self.adj_type == 'sim': init_adj = paddle.exp(-init_adj) diag_mask = 1.0 - paddle.expand(paddle.eye(node_feat.shape[1]), [node_feat.shape[0], 1, 1, 1]) init_adj = init_adj * diag_mask edge_feat_list.append(init_adj) for i in range(self.num_layers): adj, _ = self.layer_edge[i](node_feat) node_feat_new = self.layer_node[i](node_feat, adj) if self.node_concat: node_feat = paddle.concat([node_feat, node_feat_new], 2) else: node_feat = node_feat_new edge_feat_list.append(adj) if self.pre_dropout > 0: node_feat = self.predrop2(node_feat) node_feat = self.fc1(node_feat) node_feat = self.res_alpha * all_emb + node_feat s_feat = node_feat[:, :-1, :] q_feat = node_feat[:, -1, :] s_logits = self.fc2(s_feat) q_logits = self.fc2(q_feat) if return_emb: return s_logits, q_logits, edge_feat_list, s_feat, q_feat else: return s_logits, q_logits, edge_feat_list
def forward(self, logits, label): """ Forward computation. Args: logits (tuple|list): (seg_logit, edge_logit) Tensor, the data type is float32, float64. Shape is (N, C), where C is number of classes, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. C =1 of edge_logit . label (Tensor): Label tensor, the data type is int64. Shape is (N, C), where each value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is (N, C, D1, D2,..., Dk), k >= 1. """ seg_logit, edge_logit = logits[0], logits[1] if len(label.shape) != len(seg_logit.shape): label = paddle.unsqueeze(label, 1) if edge_logit.shape != label.shape: raise ValueError( 'The shape of edge_logit should equal to the label, but they are {} != {}' .format(edge_logit.shape, label.shape)) filler = paddle.ones_like(label) * self.ignore_index label = paddle.where(edge_logit > self.edge_threshold, label, filler) seg_logit = paddle.transpose(seg_logit, [0, 2, 3, 1]) label = paddle.transpose(label, [0, 2, 3, 1]) loss = F.softmax_with_cross_entropy(seg_logit, label, ignore_index=self.ignore_index, axis=-1) mask = label != self.ignore_index mask = paddle.cast(mask, 'float32') loss = loss * mask avg_loss = paddle.mean(loss) / (paddle.mean(mask) + self.EPS) if paddle.mean(mask) < self.mean_mask: self.mean_mask = paddle.mean(mask) label.stop_gradient = True mask.stop_gradient = True return avg_loss
def get_mc_loss(self, feat, inputs): # feat.shape = [bs, ch_emb, h, w] assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs index = inputs['index'] mask = inputs['index_mask'] cls_id_map = inputs['cls_id_map'] # [bs, h, w] cls_tr_ids = inputs['cls_tr_ids'] # [bs, num_classes, h, w] feat = paddle.transpose(feat, perm=[0, 2, 3, 1]) feat_n, feat_h, feat_w, feat_c = feat.shape feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c]) index = paddle.unsqueeze(index, 2) batch_inds = list() for i in range(feat_n): batch_ind = paddle.full( shape=[1, index.shape[1], 1], fill_value=i, dtype='int64') batch_inds.append(batch_ind) batch_inds = paddle.concat(batch_inds, axis=0) index = paddle.concat(x=[batch_inds, index], axis=2) feat = paddle.gather_nd(feat, index=index) mask = paddle.unsqueeze(mask, axis=2) mask = paddle.expand_as(mask, feat) mask.stop_gradient = True feat = paddle.masked_select(feat, mask > 0) feat = paddle.reshape(feat, shape=[-1, feat_c]) reid_losses = 0 for cls_id, id_num in self.num_identities_dict.items(): # target cur_cls_tr_ids = paddle.reshape( cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1]) # [bs, h*w] cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index) mask = inputs['index_mask'] cls_id_target = paddle.masked_select(cls_id_target, mask > 0) cls_id_target.stop_gradient = True # feat cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat) cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat) loss = self.reid_loss(cls_id_pred, cls_id_target) valid = (cls_id_target != self.reid_loss.ignore_index) valid.stop_gradient = True count = paddle.sum((paddle.cast(valid, dtype=np.int32))) count.stop_gradient = True if count > 0: loss = loss / count reid_losses += loss return reid_losses
def forward(self, input_ids, token_type_ids=None): r""" The BertForQuestionAnswering forward method, overrides the __call__() special method. Args: input_ids (Tensor): See :class:`BertModel`. token_type_ids (Tensor, optional): See :class:`BertModel`. Returns: tuple: Returns tuple (`start_logits`, `end_logits`). With the fields: - `start_logits` (Tensor): A tensor of the input token classification logits, indicates the start position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. - `end_logits` (Tensor): A tensor of the input token classification logits, indicates the end position of the labelled span. Its data type should be float32 and its shape is [batch_size, sequence_length]. Example: .. code-block:: import paddle from paddlenlp.transformers.bert.modeling import BertForQuestionAnswering from paddlenlp.transformers.bert.tokenizer import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-cased') model = BertForQuestionAnswering.from_pretrained('bert-base-cased') inputs = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!") inputs = {k:paddle.to_tensor([v]) for (k, v) in inputs.items()} outputs = model(**inputs) start_logits = outputs[0] end_logits =outputs[1] """ sequence_output, _ = self.bert( input_ids, token_type_ids=token_type_ids, position_ids=None, attention_mask=None) logits = self.classifier(sequence_output) logits = paddle.transpose(logits, perm=[2, 0, 1]) start_logits, end_logits = paddle.unstack(x=logits, axis=0) return start_logits, end_logits
def __combine_heads(x): """ Transpose and then reshape the last two dimensions of inpunt tensor x so that it becomes one dimension, which is reverse to __split_heads. """ if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. return paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]])
def __split_heads_qkv(queries, keys, values, n_head, d_key, d_value): """ Reshape input tensors at the last dimension to split multi-heads and then transpose. Specifically, transform the input tensor with shape [bs, max_sequence_length, n_head * hidden_dim] to the output tensor with shape [bs, n_head, max_sequence_length, hidden_dim]. """ # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. reshaped_q = paddle.reshape(x=queries, shape=[0, 0, n_head, d_key]) # permuate the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. reshaped_k = paddle.reshape(x=keys, shape=[0, 0, n_head, d_key]) k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape(x=values, shape=[0, 0, n_head, d_value]) v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) return q, k, v
def local_pairwise_distances2(x, y, max_distance=9): """Computes pairwise squared l2 distances using a local search window. Naive implementation using map_fn. Used as a slow fallback for when correlation_cost is not available. Args: x: Float32 tensor of shape [height, width, feature_dim]. y: Float32 tensor of shape [height, width, feature_dim]. max_distance: Integer, the maximum distance in pixel coordinates per dimension which is considered to be in the search window. Returns: Float32 distances tensor of shape [height, width, (2 * max_distance + 1) ** 2]. """ ori_h, ori_w, _ = x.shape x = paddle.transpose(x, [2, 0, 1]).unsqueeze(0) x = F.avg_pool2d(x, (2, 2), (2, 2)) y = paddle.transpose(y, [2, 0, 1]).unsqueeze(0) y = F.avg_pool2d(y, (2, 2), (2, 2)) _, channels, height, width = x.shape padding_val = 1e20 padded_y = F.pad(y, (max_distance, max_distance, max_distance, max_distance), mode='constant', value=padding_val) offset_y = F.unfold(padded_y, kernel_sizes=[height, width]).reshape( [1, channels, height, width, -1]) x = x.reshape([1, channels, height, width, 1]) minus = x - offset_y dists = paddle.sum(paddle.multiply(minus, minus), axis=1).reshape([1, height, width, -1]).transpose([0, 3, 1, 2]) dists = (paddle.nn.functional.sigmoid(dists) - 0.5) * 2 dists = F.interpolate(dists, size=[ori_h, ori_w], mode='bilinear', align_corners=True) dists = dists.squeeze(0).transpose([1, 2, 0]) return dists
def forward(self, feats, image): box_preds = [] cls_scores = [] prior_boxes = [] for feat, box_conv, score_conv in zip(feats, self.box_convs, self.score_convs): box_pred = box_conv(feat) box_pred = paddle.transpose(box_pred, [0, 2, 3, 1]) box_pred = paddle.reshape(box_pred, [0, -1, 4]) box_preds.append(box_pred) cls_score = score_conv(feat) cls_score = paddle.transpose(cls_score, [0, 2, 3, 1]) cls_score = paddle.reshape(cls_score, [0, -1, self.num_classes]) cls_scores.append(cls_score) prior_boxes = self.anchor_generator(feats, image) outputs = {} outputs['boxes'] = box_preds outputs['scores'] = cls_scores return outputs, prior_boxes