def filter_box_by_weight(self, pred, target, weight): index = paddle.nonzero(weight > 0) index.stop_gradient = True weight = paddle.gather_nd(weight, index) pred = paddle.gather_nd(pred, index) target = paddle.gather_nd(target, index) return pred, target, weight
def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): """logits.shape == [B*W, V]""" _, vocab_size = logits.shape bsz, beam_width = state.log_probs.shape onehot_eos = P.cast(F.one_hot(P.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] probs = P.log(F.softmax(logits)) #[B*W, V] probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] allprobs = P.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] not_finished = 1 - P.reshape(state.finished, [-1, 1]) #[B*W,1] not_eos = 1 - onehot_eos length_to_add = not_finished * not_eos #[B*W,V] alllen = P.reshape(state.lengths, [-1, 1]) + length_to_add allprobs = P.reshape(allprobs, [-1, beam_width * vocab_size]) alllen = P.reshape(alllen, [-1, beam_width * vocab_size]) allscore = hyp_score(allprobs, alllen, length_penalty) if is_first_step: allscore = P.reshape( allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 scores, idx = P.topk(allscore, k=beam_width) #[B, W] next_beam_id = idx // vocab_size #[B, W] next_word_id = idx % vocab_size gather_idx = P.concat( [P.nonzero(idx != -1)[:, :1], P.reshape(idx, [-1, 1])], 1) next_probs = P.reshape(P.gather_nd(allprobs, gather_idx), idx.shape) next_len = P.reshape(P.gather_nd(alllen, gather_idx), idx.shape) gather_idx = P.concat([ P.nonzero(next_beam_id != -1)[:, :1], P.reshape(next_beam_id, [-1, 1]) ], 1) next_finished = P.reshape( P.gather_nd(state.finished, gather_idx), state.finished.shape ) #[gather new beam state according to new beam id] #log.debug(gather_idx.numpy()) #log.debug(state.finished.numpy()) #log.debug(next_finished.numpy()) next_finished += P.cast(next_word_id == eos_id, 'int64') next_finished = P.cast(next_finished > 0, 'int64') #log.debug(next_word_id.numpy()) #log.debug(next_beam_id.numpy()) next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) return output, next_state
def update(self, arc_preds, rel_preds, arcs, rels, mask): select = paddle.nonzero(mask) arc_mask = paddle.gather_nd(arc_preds == arcs, select) rel_mask = paddle.logical_and( paddle.gather_nd(rel_preds == rels, select), arc_mask) self.total += len(arc_mask) self.correct_arcs += np.sum(arc_mask.numpy()).item() self.correct_rels += np.sum(rel_mask.numpy()).item()
def get_loss(self, heatmap, size, offset, weights, inputs): heatmap_target = inputs['heatmap'] size_target = inputs['size'] offset_target = inputs['offset'] index = inputs['index'] mask = inputs['index_mask'] heatmap = paddle.clip(F.sigmoid(heatmap), 1e-4, 1 - 1e-4) heatmap_loss = self.focal_loss(heatmap, heatmap_target) size = paddle.transpose(size, perm=[0, 2, 3, 1]) size_n, size_h, size_w, size_c = size.shape size = paddle.reshape(size, shape=[size_n, -1, size_c]) index = paddle.unsqueeze(index, 2) batch_inds = list() for i in range(size_n): batch_ind = paddle.full(shape=[1, index.shape[1], 1], fill_value=i, dtype='int64') batch_inds.append(batch_ind) batch_inds = paddle.concat(batch_inds, axis=0) index = paddle.concat(x=[batch_inds, index], axis=2) pos_size = paddle.gather_nd(size, index=index) mask = paddle.unsqueeze(mask, axis=2) size_mask = paddle.expand_as(mask, pos_size) size_mask = paddle.cast(size_mask, dtype=pos_size.dtype) pos_num = size_mask.sum() size_mask.stop_gradient = True size_target.stop_gradient = True size_loss = F.l1_loss(pos_size * size_mask, size_target * size_mask, reduction='sum') size_loss = size_loss / (pos_num + 1e-4) offset = paddle.transpose(offset, perm=[0, 2, 3, 1]) offset_n, offset_h, offset_w, offset_c = offset.shape offset = paddle.reshape(offset, shape=[offset_n, -1, offset_c]) pos_offset = paddle.gather_nd(offset, index=index) offset_mask = paddle.expand_as(mask, pos_offset) offset_mask = paddle.cast(offset_mask, dtype=pos_offset.dtype) pos_num = offset_mask.sum() offset_mask.stop_gradient = True offset_target.stop_gradient = True offset_loss = F.l1_loss(pos_offset * offset_mask, offset_target * offset_mask, reduction='sum') offset_loss = offset_loss / (pos_num + 1e-4) det_loss = weights['heatmap'] * heatmap_loss + weights[ 'size'] * size_loss + weights['offset'] * offset_loss return { 'det_loss': det_loss, 'heatmap_loss': heatmap_loss, 'size_loss': size_loss, 'offset_loss': offset_loss }
def filter_box_by_weight(self, pred, target, weight): """ Filter out boxes where ttf_reg_weight is 0, only keep positive samples. """ index = paddle.nonzero(weight > 0) index.stop_gradient = True weight = paddle.gather_nd(weight, index) pred = paddle.gather_nd(pred, index) target = paddle.gather_nd(target, index) return pred, target, weight
def get_mc_loss(self, feat, inputs): # feat.shape = [bs, ch_emb, h, w] assert 'cls_id_map' in inputs and 'cls_tr_ids' in inputs index = inputs['index'] mask = inputs['index_mask'] cls_id_map = inputs['cls_id_map'] # [bs, h, w] cls_tr_ids = inputs['cls_tr_ids'] # [bs, num_classes, h, w] feat = paddle.transpose(feat, perm=[0, 2, 3, 1]) feat_n, feat_h, feat_w, feat_c = feat.shape feat = paddle.reshape(feat, shape=[feat_n, -1, feat_c]) index = paddle.unsqueeze(index, 2) batch_inds = list() for i in range(feat_n): batch_ind = paddle.full( shape=[1, index.shape[1], 1], fill_value=i, dtype='int64') batch_inds.append(batch_ind) batch_inds = paddle.concat(batch_inds, axis=0) index = paddle.concat(x=[batch_inds, index], axis=2) feat = paddle.gather_nd(feat, index=index) mask = paddle.unsqueeze(mask, axis=2) mask = paddle.expand_as(mask, feat) mask.stop_gradient = True feat = paddle.masked_select(feat, mask > 0) feat = paddle.reshape(feat, shape=[-1, feat_c]) reid_losses = 0 for cls_id, id_num in self.num_identities_dict.items(): # target cur_cls_tr_ids = paddle.reshape( cls_tr_ids[:, cls_id, :, :], shape=[feat_n, -1]) # [bs, h*w] cls_id_target = paddle.gather_nd(cur_cls_tr_ids, index=index) mask = inputs['index_mask'] cls_id_target = paddle.masked_select(cls_id_target, mask > 0) cls_id_target.stop_gradient = True # feat cls_id_feat = self.emb_scale_dict[str(cls_id)] * F.normalize(feat) cls_id_pred = self.classifiers[str(cls_id)](cls_id_feat) loss = self.reid_loss(cls_id_pred, cls_id_target) valid = (cls_id_target != self.reid_loss.ignore_index) valid.stop_gradient = True count = paddle.sum((paddle.cast(valid, dtype=np.int32))) count.stop_gradient = True if count > 0: loss = loss / count reid_losses += loss return reid_losses
def _bipartite_match_for_batch(self, gt_bbox, gt_label, prior_boxes, bg_index): """ Args: gt_bbox (Tensor): [B, N, 4] gt_label (Tensor): [B, N, 1] prior_boxes (Tensor): [A, 4] bg_index (int): Background class index """ batch_size, num_priors = gt_bbox.shape[0], prior_boxes.shape[0] ious = iou_similarity(gt_bbox.reshape((-1, 4)), prior_boxes).reshape( (batch_size, -1, num_priors)) # Calculate the number of object per sample. num_object = (ious.sum(axis=-1) > 0).astype('int64').sum(axis=-1) # For each prior box, get the max IoU of all GTs. prior_max_iou, prior_argmax_iou = ious.max(axis=1), ious.argmax(axis=1) # For each GT, get the max IoU of all prior boxes. gt_max_iou, gt_argmax_iou = ious.max(axis=2), ious.argmax(axis=2) # Gather target bbox and label according to 'prior_argmax_iou' index. batch_ind = paddle.arange( 0, batch_size, dtype='int64').unsqueeze(-1).tile([1, num_priors]) prior_argmax_iou = paddle.stack([batch_ind, prior_argmax_iou], axis=-1) targets_bbox = paddle.gather_nd(gt_bbox, prior_argmax_iou) targets_label = paddle.gather_nd(gt_label, prior_argmax_iou) # Assign negative bg_index_tensor = paddle.full([batch_size, num_priors, 1], bg_index, 'int64') targets_label = paddle.where( prior_max_iou.unsqueeze(-1) < self.overlap_threshold, bg_index_tensor, targets_label) # Ensure each GT can match the max IoU prior box. for i in range(batch_size): if num_object[i] > 0: targets_bbox[i] = paddle.scatter( targets_bbox[i], gt_argmax_iou[i, :int(num_object[i])], gt_bbox[i, :int(num_object[i])]) targets_label[i] = paddle.scatter( targets_label[i], gt_argmax_iou[i, :int(num_object[i])], gt_label[i, :int(num_object[i])]) # Encode box prior_boxes = prior_boxes.unsqueeze(0).tile([batch_size, 1, 1]) targets_bbox = bbox2delta(prior_boxes.reshape([-1, 4]), targets_bbox.reshape([-1, 4]), self.prior_box_var) targets_bbox = targets_bbox.reshape([batch_size, -1, 4]) return targets_bbox, targets_label
def _forward(self): det_outs = self.detector(self.inputs) if self.training: emb_feats = det_outs['emb_feats'] loss_confs = det_outs['det_losses']['loss_confs'] loss_boxes = det_outs['det_losses']['loss_boxes'] jde_losses = self.reid(emb_feats, self.inputs, loss_confs, loss_boxes) return jde_losses else: if self.metric == 'MOTDet': det_results = { 'bbox': det_outs['bbox'], 'bbox_num': det_outs['bbox_num'], } return det_results elif self.metric == 'ReID': emb_feats = det_outs['emb_feats'] embs_and_gts = self.reid(emb_feats, self.inputs, test_emb=True) return embs_and_gts elif self.metric == 'MOT': emb_feats = det_outs['emb_feats'] emb_outs = self.reid(emb_feats, self.inputs) boxes_idx = det_outs['boxes_idx'] bbox = det_outs['bbox'] input_shape = self.inputs['image'].shape[2:] im_shape = self.inputs['im_shape'] scale_factor = self.inputs['scale_factor'] bbox[:, 2:] = scale_coords(bbox[:, 2:], input_shape, im_shape, scale_factor) nms_keep_idx = det_outs['nms_keep_idx'] pred_dets = paddle.concat((bbox[:, 2:], bbox[:, 1:2]), axis=1) emb_valid = paddle.gather_nd(emb_outs, boxes_idx) pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx) online_targets = self.tracker.update(pred_dets, pred_embs) return online_targets else: raise ValueError( "Unknown metric {} for multi object tracking.".format( self.metric))
def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): f_char = paddle.transpose(f_char, [0, 2, 3, 1]) tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) tcl_pos = paddle.cast(tcl_pos, dtype=int) f_tcl_char = paddle.gather_nd(f_char, tcl_pos) f_tcl_char = paddle.reshape(f_tcl_char, [-1, 64, 37]) # len(Lexicon_Table)+1 f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 b, c, l = tcl_mask.shape tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) tcl_mask_fg.stop_gradient = True f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (-20.0) f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) N, B, _ = f_tcl_char_ld.shape input_lengths = paddle.to_tensor([N] * B, dtype='int64') loss_out = paddle.fluid.layers.warpctc(f_tcl_char_ld, tcl_label, self.pad_num, True, input_lengths, label_t) cost = paddle.fluid.layers.squeeze(loss_out, [-1]) cost = cost.mean() return cost
def __call__(self, s_arc, s_rel, arcs, rels, mask): arcs = paddle.masked_select(arcs, mask) rels = paddle.masked_select(rels, mask) select = paddle.nonzero(mask) s_arc = paddle.gather_nd(s_arc, select) s_rel = paddle.gather_nd(s_rel, select) s_rel = index_sample(s_rel, paddle.unsqueeze(arcs, axis=1)) arc_cost = F.cross_entropy(s_arc, arcs) rel_cost = F.cross_entropy(s_rel, rels) avg_cost = paddle.mean(arc_cost + rel_cost) return avg_cost
def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t): f_char = paddle.transpose(f_char, [0, 2, 3, 1]) tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) tcl_pos = paddle.cast(tcl_pos, dtype=int) f_tcl_char = paddle.gather_nd(f_char, tcl_pos) f_tcl_char = paddle.reshape(f_tcl_char, [-1, 64, 37]) # len(Lexicon_Table)+1 f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 b, c, l = tcl_mask.shape tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) tcl_mask_fg.stop_gradient = True f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (-20.0) f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2) f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2)) N, B, _ = f_tcl_char_ld.shape input_lengths = paddle.to_tensor([N] * B, dtype='int64') cost = paddle.nn.functional.ctc_loss(log_probs=f_tcl_char_ld, labels=tcl_label, input_lengths=input_lengths, label_lengths=label_t, blank=self.pad_num, reduction='none') cost = cost.mean() return cost
def _get_rand_mask(self, blocked_query_mask, blocked_key_mask, rand_mask_idx, batch_size, sequence_length): ''' return random mask: [B, H, L-G, bs, R * bs] ''' # rand_mask_idx: [H, T] # blocked_query_mask: [B, L, bs] # blocked_key_mask: [B, L, bs] bs = self.block_size B = batch_size L = sequence_length // bs H = self.num_heads G = self.num_global_blocks GB = self.num_global_blocks_back GF = self.num_global_blocks_front R = self.num_rand_blocks temp_block_key_mask = paddle.unsqueeze(blocked_key_mask, 1) temp_block_key_mask = paddle.expand(temp_block_key_mask, [B, H, L, -1]) temp_block_key_mask_list = [ paddle.gather_nd(temp_block_key_mask[b], rand_mask_idx) for b in range(B) ] temp_block_key_mask = paddle.concat(temp_block_key_mask_list, 0) temp_block_key_mask = paddle.reshape(temp_block_key_mask, [B, H, L - G, 1, R * bs]) temp_blocked_query_mask = paddle.unsqueeze( blocked_query_mask[:, GF:-GB], 1) temp_blocked_query_mask = paddle.expand(temp_blocked_query_mask, [B, H, L - G, -1]) temp_blocked_query_mask = paddle.reshape(temp_blocked_query_mask, [B, H, L - G, bs, 1]) rand_mask = paddle.matmul(temp_blocked_query_mask, temp_block_key_mask) return rand_mask
def get_loss(self, scores, deltas, targets, rois, bbox_weight): """ scores (Tensor): scores from bbox head outputs deltas (Tensor): deltas from bbox head outputs targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds rois (List[Tensor]): RoIs generated in each batch """ # TODO: better pass args tgt_labels, tgt_bboxes, tgt_gt_inds = targets tgt_labels = paddle.concat( tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0] tgt_labels = tgt_labels.cast('int64') tgt_labels.stop_gradient = True loss_bbox_cls = F.cross_entropy(input=scores, label=tgt_labels, reduction='mean') # bbox reg cls_agnostic_bbox_reg = deltas.shape[1] == 4 fg_inds = paddle.nonzero( paddle.logical_and(tgt_labels >= 0, tgt_labels < self.num_classes)).flatten() cls_name = 'loss_bbox_cls' reg_name = 'loss_bbox_reg' loss_bbox = {} if cls_agnostic_bbox_reg: reg_delta = paddle.gather(deltas, fg_inds) else: fg_gt_classes = paddle.gather(tgt_labels, fg_inds) reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1) reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1]) reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4) reg_col_inds = reg_col_inds.reshape([-1, 1]) reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1) reg_delta = paddle.gather(deltas, fg_inds) reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4]) rois = paddle.concat(rois) if len(rois) > 1 else rois[0] tgt_bboxes = paddle.concat( tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0] reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight) reg_target = paddle.gather(reg_target, fg_inds) reg_target.stop_gradient = True loss_bbox_reg = paddle.abs(reg_delta - reg_target).sum() / tgt_labels.shape[0] loss_bbox[cls_name] = loss_bbox_cls loss_bbox[reg_name] = loss_bbox_reg return loss_bbox
def forward(self, identify_feats, targets, loss_confs=None, loss_boxes=None, bboxes=None, boxes_idx=None, nms_keep_idx=None): assert self.num_classes == 1, 'JDE only support sindle class MOT.' assert len(identify_feats) == self.anchor_levels ide_outs = [] for feat, ide_head in zip(identify_feats, self.identify_outputs): ide_outs.append(ide_head(feat)) if self.training: assert len(loss_confs) == len(loss_boxes) == self.anchor_levels loss_ides = self.emb_loss(ide_outs, targets, self.emb_scale, self.classifier) jde_losses = self.jde_loss(loss_confs, loss_boxes, loss_ides, self.loss_params_cls, self.loss_params_reg, self.loss_params_ide, targets) return jde_losses else: assert bboxes is not None assert boxes_idx is not None assert nms_keep_idx is not None emb_outs = self.get_emb_outs(ide_outs) emb_valid = paddle.gather_nd(emb_outs, boxes_idx) pred_embs = paddle.gather_nd(emb_valid, nms_keep_idx) input_shape = targets['image'].shape[2:] # input_shape: [h, w], before data transforms, set in model config im_shape = targets['im_shape'][0].numpy() # im_shape: [new_h, new_w], after data transforms scale_factor = targets['scale_factor'][0].numpy() bboxes[:, 2:] = self.scale_coords(bboxes[:, 2:], input_shape, im_shape, scale_factor) # tlwhs, scores, cls_ids pred_dets = paddle.concat( (bboxes[:, 2:], bboxes[:, 1:2], bboxes[:, 0:1]), axis=1) return pred_dets, pred_embs
def forward(self, inputs, lengths): """ Computes the normalization in a linear-chain CRF. See http://www.cs.columbia.edu/~mcollins/fb.pdf for reference. $$ F = logZ(x) = log\\sum_y exp(score(x,y)) $$ $$ score(x,y) = \\sum_i Emit(x_i,y_i) + Trans(y_{i-1}, y_i) $$ mark $$ p(y_i) = Emit(x_i,y_i), T(y_{i-1}, y_i)=Trans(y_{i-1}, y_i) $$ then we can get $$ F(1) = log\\sum_{y1} exp(p(y_1) + T([START], y1)) $$ $$ F(2) = log\\sum_{y1}\\sum_{y2} exp(p(y_1) + T([START], y1) + p(y_2) + T(y_1,y_2)) = log\\sum_{y2} exp(F(1) + p(y_2) + T(y_1,y_2)) $$ $$ F(...) = ... $$ A recursive formula. Args: inputs (Tensor): The input tensor with shape `[batch_size, sequence_length, num_tags]`. lengths (Tensor): The input length with shape `[batch_size]`. Returns: Tensor: The normalizers tensor, with shape `[batch_size]`. """ batch_size, seq_len, n_labels = inputs.shape inputs_t_exp = inputs.transpose([1, 0, 2]).unsqueeze(-1).expand( [seq_len, batch_size, n_labels, n_labels]) # trans_exp: batch_size, num_tags, num_tags trans_exp = self.transitions.unsqueeze(0).expand( [batch_size, n_labels, n_labels]) all_alpha = [] if self.with_start_stop_tag: alpha = self._initialize_alpha(batch_size) for i, input_exp in enumerate(inputs_t_exp): # input_exp: batch_size, num_tags, num_tags # alpha_exp: batch_size, num_tags, num_tags if i == 0 and not self.with_start_stop_tag: mat = input_exp else: alpha_exp = alpha.unsqueeze(1).expand( [batch_size, n_labels, n_labels]) # F(n) = logsumexp(F(n-1) + p(y_n) + T(y_{n-1}, y_n)) mat = input_exp + trans_exp + alpha_exp alpha = paddle.logsumexp(mat, 2) all_alpha.append(alpha) # Get the valid alpha all_alpha = paddle.stack(all_alpha).transpose([1, 0, 2]) batch_index = self._get_batch_index(batch_size) last_index = lengths - 1 idxs = paddle.stack([batch_index, last_index], axis=1) alpha = paddle.gather_nd(all_alpha, idxs) if self.with_start_stop_tag: # The last one step alpha += self.transitions[self.stop_idx].unsqueeze(0) norm_score = paddle.logsumexp(alpha, 1) return norm_score
def sample_from_mog(self, y): """Sample from the output distribution when the output distribution is a mixture of Gaussian distributions. Parameters ------------ y : Tensor [shape=(B, T, C_output)] The parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where ``n_mixture`` means the number of Gaussians in the mixture. Returns -------- Tensor: [shape=(B, T)] Waveform sampled from the output distribution. """ batch_size, time_steps, output_dim = y.shape n_mixture = output_dim // 3 w, mu, log_std = paddle.split(y, 3, -1) reshaped_w = paddle.reshape(w, (batch_size * time_steps, n_mixture)) prob_ids = paddle.fluid.layers.sampling_id(F.softmax(reshaped_w)) prob_ids = paddle.reshape(prob_ids, (batch_size, time_steps)) prob_ids = prob_ids.numpy() # do it index = np.array([[[b, t, prob_ids[b, t]] for t in range(time_steps)] for b in range(batch_size)]).astype("int32") index_var = paddle.to_tensor(index) mu_ = paddle.gather_nd(mu, index_var) log_std_ = paddle.gather_nd(log_std, index_var) dist = D.Normal(mu_, paddle.exp(log_std_)) samples = dist.sample(shape=[]) samples = paddle.clip(samples, min=-1., max=1.) return samples
def flat_words(words, pad_index=0): mask = words != pad_index lens = paddle.sum(paddle.cast(mask, "int64"), axis=-1) position = paddle.cumsum( lens + paddle.cast((lens == 0), "int64"), axis=1) - 1 select = paddle.nonzero(mask) words = paddle.gather_nd(words, select) lens = paddle.sum(lens, axis=-1) words = pad_sequence_paddle(words, lens, pad_index) max_len = words.shape[1] position = mask_fill(position, position >= max_len, max_len - 1) return words, position
def process_by_class(self, bboxes, embedding, bbox_inds, topk_clses): pred_dets, pred_embs = [], [] for cls_id in range(self.num_classes): inds_masks = topk_clses == cls_id inds_masks = paddle.cast(inds_masks, 'float32') pos_num = inds_masks.sum().numpy() if pos_num == 0: continue cls_inds_mask = inds_masks > 0 bbox_mask = paddle.nonzero(cls_inds_mask) cls_bboxes = paddle.gather_nd(bboxes, bbox_mask) pred_dets.append(cls_bboxes) cls_inds = paddle.masked_select(bbox_inds, cls_inds_mask) cls_inds = cls_inds.unsqueeze(-1) cls_embedding = paddle.gather_nd(embedding, cls_inds) pred_embs.append(cls_embedding) return paddle.concat(pred_dets), paddle.concat(pred_embs)
def get_loss(self, mask_logits, mask_label, mask_target, mask_weight): mask_label = F.one_hot(mask_label, self.num_classes).unsqueeze([2, 3]) mask_label = paddle.expand_as(mask_label, mask_logits) mask_label.stop_gradient = True mask_pred = paddle.gather_nd(mask_logits, paddle.nonzero(mask_label)) shape = mask_logits.shape mask_pred = paddle.reshape(mask_pred, [shape[0], shape[2], shape[3]]) mask_target = mask_target.cast('float32') mask_weight = mask_weight.unsqueeze([1, 2]) loss_mask = F.binary_cross_entropy_with_logits( mask_pred, mask_target, weight=mask_weight, reduction="mean") return loss_mask
def __getitem__(self, idx): is_bool = False if self.dtype == paddle_dtypes.t_bool: self = self.cast("int32") is_bool = True if isinstance(idx, paddle.Tensor) and len(idx.shape) == 1: out = paddle.gather(self, idx) return out.cast("bool") if is_bool else out elif isinstance(idx, paddle.Tensor) and idx.dtype == paddle_dtypes.t_bool: idx = paddle.cast(idx, "int32") idx = paddle.nonzero(idx) out = paddle.gather_nd(self, idx) return out.cast("bool") if is_bool else out elif isinstance(idx, tuple): if is_condition_one(idx): first_idx = idx[0] first_idx = paddle.cast(first_idx, "int32") first_idx = paddle.nonzero(first_idx) out = paddle.gather_nd(self, first_idx) return out.cast("bool") if is_bool else out elif is_condition_two(idx): new_idx = list() for i in range(len(self.shape) - 1): new_idx.append(slice(None, None, None)) new_idx.append(list(idx)[-1]) out = self.tmp(tuple(new_idx)) return out.cast("bool") if is_bool else out else: out = self.tmp(idx) return out.cast("bool") if is_bool else out # TODO(syf): 出来为(slice(None, None, None), slice(None, None, None), 0) else: out = self.tmp(idx) if out.shape == [1]: return out.numpy()[0] else: return out
def index_sample(x, index): """ Select input value according to index Arags: input: input matrix index: index matrix Returns: output >>> input [ [1, 2, 3], [4, 5, 6] ] >>> index [ [1, 2], [0, 1] ] >>> index_sample(input, index) [ [2, 3], [4, 5] ] """ x_s = x.shape dim = len(index.shape) - 1 assert x_s[:dim] == index.shape[:dim] if len(x_s) == 3 and dim == 1: r_x = paddle.reshape(x, shape=[-1, x_s[1], x_s[-1]]) else: r_x = paddle.reshape(x, shape=[-1, x_s[-1]]) index = paddle.reshape(index, shape=[len(r_x), -1, 1]) # Generate arange index, shape like index arr_index = paddle.arange(start=0, end=len(index), dtype=index.dtype) arr_index = paddle.unsqueeze(arr_index, axis=[1, 2]) arr_index = paddle.expand(arr_index, index.shape) # Genrate new index new_index = paddle.concat((arr_index, index), -1) new_index = paddle.reshape(new_index, (-1, 2)) # Get output out = paddle.gather_nd(r_x, new_index) if len(x_s) == 3 and dim == 2: out = paddle.reshape(out, shape=[x_s[0], x_s[1], -1]) else: out = paddle.reshape(out, shape=[x_s[0], -1]) return out
def test_static(self): with fluid.program_guard(fluid.Program(), fluid.Program()): data1 = fluid.layers.data('data1', shape=[-1, 2], dtype='float64') index = fluid.layers.data('index', shape=[-1, 1], dtype='int32') out = paddle.gather_nd(data1, index) place = fluid.CPUPlace() exe = fluid.Executor(place) input = np.array([[1, 2], [3, 4], [5, 6]]) index_1 = np.array([[1]]) result, = exe.run(feed={ "data1": input, "index": index_1 }, fetch_list=[out]) expected_output = np.array([[3, 4]]) self.assertTrue(np.allclose(result, expected_output))
def forward(self, x): """Forward network""" mask = paddle.any(x != self.pad_index, axis=-1) lens = paddle.sum(paddle.cast(mask, 'int32'), axis=-1) select = paddle.nonzero(mask) masked_x = paddle.gather_nd(x, select) char_mask = masked_x != self.pad_index emb = self.embed(masked_x) word_lens = paddle.sum(paddle.cast(char_mask, 'int32'), axis=-1) _, (h, _) = self.lstm(emb, sequence_length=word_lens) h = paddle.concat(paddle.unstack(h), axis=-1) feat_embed = pad_sequence_paddle(h, lens, pad_index=self.pad_index) return feat_embed
def batch_gather_2d(var, indices): """Gather slices from var in each batch, according to corrensponding index in indices. Currently, it only support 2d Tensor. Args: var (Variable): with shape [batch_size, ...] indices (Variable): with shape [batch_size, max_len] Returns: Variable with shape [batch_size] Raises: NULL Examples: var [[1, 2, 3], [4, 5, 6]] indices [[2, 0], [1, 2]] return [[3, 1], [5, 6]] """ if len(indices.shape) != 2: raise ValueError('shape of indices error. it should be a 2-D layers. ' 'but got shape = %s' % (str(indices.shape), )) batch_size = paddle.shape(indices)[0] zero = paddle.to_tensor([0], dtype='int64') one = paddle.to_tensor([1], dtype='int64') end = paddle.cast(batch_size, dtype='int64') batch_indices_1d = paddle.unsqueeze( paddle.arange(zero, end, one, dtype=indices.dtype), [1]) seq_len = indices.shape[1] batch_indices = paddle.expand(batch_indices_1d, [batch_size, seq_len]) coord_2d = paddle.concat( [paddle.unsqueeze(batch_indices, [2]), paddle.unsqueeze(indices, [2])], axis=2) coord_2d.stop_gradient = True coord_1d = paddle.reshape(coord_2d, shape=[-1, 2]) output_1d = paddle.gather_nd(var, coord_1d) output_2d = paddle.reshape(output_1d, [batch_size, seq_len, var.shape[-1]]) return output_2d
def __getitem__(self, idx): """ getitem function """ if isinstance(idx, paddle.Tensor) and len(idx.shape) == 1: out = paddle.gather(self, idx) return out elif isinstance(idx, paddle.Tensor) and str(idx.dtype) == "VarType.BOOL": idx = paddle.cast(idx, "int32") idx = paddle.nonzero(idx) out = paddle.gather_nd(self, idx) return out elif isinstance(idx, tuple): return self.tmp(idx) # TODO(syf): 出来为(slice(None, None, None), slice(None, None, None), 0) else: return self.tmp(idx)
def forward(self, head_out, anchors): """ Decode the bbox and do NMS for JDE model. Args: head_out (list): Bbox_pred and cls_prob of bbox_head output. anchors (list): Anchors of JDE model. Returns: boxes_idx (Tensor): The index of kept bboxes after decode 'JDEBox'. bbox_pred (Tensor): The output is the prediction with shape [N, 6] including labels, scores and bboxes. bbox_num (Tensor): The number of prediction of each batch with shape [N]. nms_keep_idx (Tensor): The index of kept bboxes after NMS. """ boxes_idx, yolo_boxes_scores = self.decode(head_out, anchors) if len(boxes_idx) == 0: boxes_idx = self.fake_boxes_idx yolo_boxes_out = self.fake_yolo_boxes_out yolo_scores_out = self.fake_yolo_scores_out else: yolo_boxes = paddle.gather_nd(yolo_boxes_scores, boxes_idx) # TODO: only support bs=1 now yolo_boxes_out = paddle.reshape( yolo_boxes[:, :4], shape=[1, len(boxes_idx), 4]) yolo_scores_out = paddle.reshape( yolo_boxes[:, 4:5], shape=[1, 1, len(boxes_idx)]) boxes_idx = boxes_idx[:, 1:] if self.return_idx: bbox_pred, bbox_num, nms_keep_idx = self.nms( yolo_boxes_out, yolo_scores_out, self.num_classes) if bbox_pred.shape[0] == 0: bbox_pred = self.fake_bbox_pred bbox_num = self.fake_bbox_num nms_keep_idx = self.fake_nms_keep_idx return boxes_idx, bbox_pred, bbox_num, nms_keep_idx else: bbox_pred, bbox_num, _ = self.nms(yolo_boxes_out, yolo_scores_out, self.num_classes) if bbox_pred.shape[0] == 0: bbox_pred = self.fake_bbox_pred bbox_num = self.fake_bbox_num return _, bbox_pred, bbox_num, _
def train_one_batch(self, batch, iter): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \ get_input_from_batch(batch) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch) self.optimizer.clear_gradients() encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder( enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = \ self.model.decoder(y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, di) target = target_batch[:, di] add_index = paddle.arange(0, target.shape[0]) new_index = paddle.stack([add_index, target], axis=1) gold_probs = paddle.gather_nd(final_dist, new_index).squeeze() step_loss = -paddle.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = paddle.sum( paddle.minimum(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss coverage = next_coverage step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = paddle.sum(paddle.stack(step_losses, 1), 1) batch_avg_loss = sum_losses / dec_lens_var loss = paddle.mean(batch_avg_loss) loss.backward() self.optimizer.minimize(loss) return loss.numpy()[0]
def forward(self, logit, label): logit = paddle.reshape( logit, [logit.shape[0], logit.shape[1], -1]) # N,C,H,W => N,C,H*W logit = paddle.transpose(logit, [0, 2, 1]) # N,C,H*W => N,H*W,C logit = paddle.reshape(logit, [-1, logit.shape[2]]) # N,H*W,C => N*H*W,C label = paddle.reshape(label, [-1, 1]) range_ = paddle.arange(0, label.shape[0]) range_ = paddle.unsqueeze(range_, axis=-1) label = paddle.cast(label, dtype='int64') label = paddle.concat([range_, label], axis=-1) logpt = F.log_softmax(logit) logpt = paddle.gather_nd(logpt, label) pt = paddle.exp(logpt.detach()) loss = -1 * (1 - pt)**self.gamma * logpt loss = paddle.mean(loss) return loss
def __call__(self, yolo_head_out, anchors): bbox_pred_list = [] for i, head_out in enumerate(yolo_head_out): stride = self.downsample_ratio // 2**i anc_w, anc_h = anchors[i][0::2], anchors[i][1::2] anchor_vec = np.stack((anc_w, anc_h), axis=1) / stride nA = len(anc_w) boxes_shape = paddle.shape(head_out) boxes_shape.stop_gradient = True nB, nGh, nGw = boxes_shape[0], boxes_shape[-2], boxes_shape[-1] p = head_out.reshape((nB, nA, self.num_classes + 5, nGh, nGw)) p = paddle.transpose(p, perm=[0, 1, 3, 4, 2]) # [nB, 4, 19, 34, 6] p_box = p[:, :, :, :, :4] # [nB, 4, 19, 34, 4] boxes = self.decode_delta_map(p_box, anchor_vec) # [nB, 4*19*34, 4] boxes = boxes * stride p_conf = paddle.transpose(p[:, :, :, :, 4:6], perm=[0, 4, 1, 2, 3]) # [nB, 2, 4, 19, 34] p_conf = F.softmax(p_conf, axis=1)[:, 1, :, :, :].unsqueeze( -1) # [nB, 4, 19, 34, 1] scores = paddle.reshape(p_conf, shape=[nB, -1, 1]) bbox_pred_list.append(paddle.concat([boxes, scores], axis=-1)) yolo_boxes_pred = paddle.concat(bbox_pred_list, axis=1) boxes_idx = paddle.nonzero( yolo_boxes_pred[:, :, -1] > self.conf_thresh) boxes_idx.stop_gradient = True if boxes_idx.shape[0] == 0: # TODO: deploy boxes_idx = paddle.to_tensor(np.array([[0]], dtype='int64')) yolo_boxes_out = paddle.to_tensor( np.array([[[0.0, 0.0, 0.0, 0.0]]], dtype='float32')) yolo_scores_out = paddle.to_tensor( np.array([[[0.0]]], dtype='float32')) return boxes_idx, yolo_boxes_out, yolo_scores_out yolo_boxes = paddle.gather_nd(yolo_boxes_pred, boxes_idx) yolo_boxes_out = paddle.reshape(yolo_boxes[:, :4], shape=[nB, -1, 4]) yolo_scores_out = paddle.reshape(yolo_boxes[:, 4:5], shape=[nB, 1, -1]) boxes_idx = boxes_idx[:, 1:] return boxes_idx, yolo_boxes_out, yolo_scores_out # [163], [1, 163, 4], [1, 1, 163]
def _gather_random_key_value(self, blocked_matrix, rand_mask_idx, B, T): ''' return random key matrix: [B, H, L-G, R * bs, -1] ''' # blocked_matrix: [B, H, L, bs, -1] # rand_mask_idx: [H, T] G = self.num_global_blocks H = self.num_heads bs = self.block_size L = T // bs R = self.num_rand_blocks gathered_matrix = paddle.concat([ paddle.gather_nd(blocked_matrix[b, :], rand_mask_idx) for b in range(B) ], axis=0) gathered_matrix = paddle.reshape(gathered_matrix, [B, H, L - G, R * bs, -1]) return gathered_matrix