def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, allow_low_quality): iou = bbox_overlaps(gt_boxes, anchors) if iou.numel() == 0: default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') default_match_labels = paddle.full((iou.shape[1], ), -1, dtype='int32') return default_matches, default_match_labels matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') match_labels = paddle.where(matched_vals < negative_overlap, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) if allow_low_quality: highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) pred_inds_with_highest_quality = paddle.logical_and( iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(0, keepdim=True) match_labels = paddle.where(pred_inds_with_highest_quality > 0, paddle.ones_like(match_labels), match_labels) matches = matches.flatten() match_labels = match_labels.flatten() return matches, match_labels
def forward(self, input_ids, token_type_ids=None, position_ids=None, task_type_ids=None): if position_ids is None: # maybe need use shape op to unify static graph and dynamic graph #seq_length = input_ids.shape[1] ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") input_embedings = self.word_embeddings(input_ids) position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) embeddings = input_embedings + position_embeddings + token_type_embeddings if self.use_task_id: if task_type_ids is None: task_type_ids = paddle.ones_like(input_ids, dtype="int64") * self.task_id task_type_embeddings = self.task_type_embeddings(task_type_ids) embeddings = embeddings + task_type_embeddings embeddings = self.layer_norm(embeddings) embeddings = self.dropout(embeddings) return embeddings
def test_out(self): with fluid.program_guard(fluid.Program()): data = fluid.data(shape=[10], dtype="float64", name="data") ones = paddle.ones_like(data, device="cpu") place = fluid.CPUPlace() exe = fluid.Executor(place) result, = exe.run(feed={"data": np.random.rand(10)}, fetch_list=[ones]) expected_result = np.ones(10, dtype="float64") self.assertEqual((result == expected_result).all(), True) with fluid.program_guard(fluid.Program()): data = fluid.data(shape=[10], dtype="float64", name="data") ones = paddle.ones_like(data, device="cpu", dtype="float32") place = fluid.CPUPlace() exe = fluid.Executor(place) result, = exe.run(feed={"data": np.random.rand(10)}, fetch_list=[ones]) expected_result = np.ones(10, dtype="float32") self.assertEqual((result == expected_result).all(), True) with fluid.program_guard(fluid.Program()): data = fluid.data(shape=[10], dtype="float64", name="data") ones = paddle.ones_like(data) place = fluid.CPUPlace() exe = fluid.Executor(place) result, = exe.run(feed={"data": np.random.rand(10)}, fetch_list=[ones]) expected_result = np.ones(10, dtype="float32") self.assertEqual((result == expected_result).all(), True)
def forward(self, x, y): if self.bias_x: x = paddle.concat([x, paddle.ones_like(x[:, :, :1])], axis=-1) if self.bias_y: y = paddle.concat([y, paddle.ones_like(x[:, :, :1])], axis=-1) # Shape x: (batch_size, num_tokens, input_size + bias_x) b = x.shape[0] o = self.weight.shape[0] # Shape x: (batch_size, output_size, num_tokens, input_size + bias_x) x = paddle.expand(paddle.unsqueeze(x, axis=1), shape=(x.shape[0], o, x.shape[1], x.shape[2])) # Shape y: (batch_size, output_size, num_tokens, input_size + bias_y) y = paddle.expand(paddle.unsqueeze(y, axis=1), shape=(y.shape[0], o, y.shape[1], y.shape[2])) # Shape weight: (batch_size, output_size, input_size + bias_x, input_size + bias_y) weight = paddle.expand(paddle.unsqueeze(self.weight, axis=0), shape=(b, self.weight.shape[0], self.weight.shape[1], self.weight.shape[2])) # Shape: (batch_size, output_size, num_tokens, num_tokens) s = paddle.matmul(paddle.matmul(x, weight), paddle.transpose(y, perm=[0, 1, 3, 2])) # Remove dim 1 if n_out == 1 if s.shape[1] == 1: s = paddle.squeeze(s, axis=1) return s
def test_api(self): shape = [3, 4] startup_program = Program() train_program = Program() with program_guard(train_program, startup_program): x = paddle.fluid.data('X', shape) # 'bool', 'float32', 'float64', 'int32', 'int64' out1 = ones_like(x) out2 = ones_like(x, np.bool) out3 = ones_like(x, 'float64') out4 = ones_like(x, 'int32') out5 = ones_like(x, 'int64') place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) outs = exe.run(train_program, feed={'X': np.ones(shape).astype('float32')}, fetch_list=[out1, out2, out3, out4, out5]) for i, dtype in enumerate( [np.float32, np.bool, np.float64, np.int32, np.int64]): self.assertEqual(outs[i].dtype, dtype) self.assertEqual((outs[i] == np.ones(shape, dtype)).all(), True)
def label_box(anchors, gt_boxes, positive_overlap, negative_overlap, allow_low_quality, ignore_thresh, is_crowd=None): iou = bbox_overlaps(gt_boxes, anchors) n_gt = gt_boxes.shape[0] if n_gt == 0 or is_crowd is None: n_gt_crowd = 0 else: n_gt_crowd = paddle.nonzero(is_crowd).shape[0] if iou.shape[0] == 0 or n_gt_crowd == n_gt: # No truth, assign everything to background default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64') default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32') return default_matches, default_match_labels # if ignore_thresh > 0, remove anchor if it is closed to # one of the crowded ground-truth if n_gt_crowd > 0: N_a = anchors.shape[0] ones = paddle.ones([N_a]) mask = is_crowd * ones if ignore_thresh > 0: crowd_iou = iou * mask valid = (paddle.sum((crowd_iou > ignore_thresh).cast('int32'), axis=0) > 0).cast('float32') iou = iou * (1 - valid) - valid # ignore the iou between anchor and crowded ground-truth iou = iou * (1 - mask) - mask matched_vals, matches = paddle.topk(iou, k=1, axis=0) match_labels = paddle.full(matches.shape, -1, dtype='int32') # set ignored anchor with iou = -1 neg_cond = paddle.logical_and(matched_vals > -1, matched_vals < negative_overlap) match_labels = paddle.where(neg_cond, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) if allow_low_quality: highest_quality_foreach_gt = iou.max(axis=1, keepdim=True) pred_inds_with_highest_quality = paddle.logical_and( iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(0, keepdim=True) match_labels = paddle.where(pred_inds_with_highest_quality > 0, paddle.ones_like(match_labels), match_labels) matches = matches.flatten() match_labels = match_labels.flatten() return matches, match_labels
def libra_sample_bbox(matches, match_labels, matched_vals, gt_classes, batch_size_per_im, num_classes, fg_fraction, fg_thresh, bg_thresh, num_bins, use_random=True, is_cascade_rcnn=False): rois_per_image = int(batch_size_per_im) fg_rois_per_im = int(np.round(fg_fraction * rois_per_image)) bg_rois_per_im = rois_per_image - fg_rois_per_im if is_cascade_rcnn: fg_inds = paddle.nonzero(matched_vals >= fg_thresh) bg_inds = paddle.nonzero(matched_vals < bg_thresh) else: matched_vals_np = matched_vals.numpy() match_labels_np = match_labels.numpy() # sample fg fg_inds = paddle.nonzero(matched_vals >= fg_thresh).flatten() fg_nums = int(np.minimum(fg_rois_per_im, fg_inds.shape[0])) if (fg_inds.shape[0] > fg_nums) and use_random: fg_inds = libra_sample_pos(matched_vals_np, match_labels_np, fg_inds.numpy(), fg_rois_per_im) fg_inds = fg_inds[:fg_nums] # sample bg bg_inds = paddle.nonzero(matched_vals < bg_thresh).flatten() bg_nums = int(np.minimum(rois_per_image - fg_nums, bg_inds.shape[0])) if (bg_inds.shape[0] > bg_nums) and use_random: bg_inds = libra_sample_neg(matched_vals_np, match_labels_np, bg_inds.numpy(), bg_rois_per_im, num_bins=num_bins, bg_thresh=bg_thresh) bg_inds = bg_inds[:bg_nums] sampled_inds = paddle.concat([fg_inds, bg_inds]) gt_classes = paddle.gather(gt_classes, matches) gt_classes = paddle.where(match_labels == 0, paddle.ones_like(gt_classes) * num_classes, gt_classes) gt_classes = paddle.where(match_labels == -1, paddle.ones_like(gt_classes) * -1, gt_classes) sampled_gt_classes = paddle.gather(gt_classes, sampled_inds) return sampled_inds, sampled_gt_classes
def dmr_fcn_attention(item_eb, item_his_eb, context_his_eb, mask, mode='SUM'): mask = paddle.equal(mask, paddle.ones_like(mask)) item_eb_tile = paddle.tile(item_eb, [1, paddle.shape(mask)[1]]) # B, T*E item_eb_tile = paddle.reshape( item_eb_tile, [-1, paddle.shape(mask)[1], item_eb.shape[-1]]) # B, T, E if context_his_eb is None: query = item_eb_tile else: query = paddle.concat([item_eb_tile, context_his_eb], axis=-1) query = self.query_layer2(query) query = self.query_prelu2(query) dmr_all = paddle.concat( [ query, item_his_eb, query - item_his_eb, query * item_his_eb ], axis=-1) att_layer_1 = self.att_layer1_layer2(dmr_all) att_layer_1 = F.sigmoid(att_layer_1) att_layer_2 = self.att_layer2_layer2(att_layer_1) att_layer_2 = F.sigmoid(att_layer_2) att_layer_3 = self.att_layer3_layer2(att_layer_2) # B, T, 1 att_layer_3 = paddle.reshape( att_layer_3, [-1, 1, paddle.shape(item_his_eb)[1]]) # B,1,T scores = att_layer_3 scores = scores.reshape([-1, 1, self.history_length]) ## # Mask key_masks = paddle.unsqueeze(mask, 1) # B,1,T paddings = paddle.ones_like(scores) * (-2**32 + 1) paddings_no_softmax = paddle.zeros_like(scores) scores = paddle.where(key_masks, scores, paddings) # [B, 1, T] scores_no_softmax = paddle.where(key_masks, scores, paddings_no_softmax) scores = F.softmax(scores) if mode == 'SUM': output = paddle.matmul(scores, item_his_eb) # [B, 1, H] output = paddle.sum(output, axis=1) # B,E else: scores = paddle.reshape(scores, [-1, paddle.shape(item_his_eb)[1]]) output = item_his_eb * paddle.unsqueeze(scores, -1) output = paddle.reshape(output, paddle.shape(item_his_eb)) return output, scores, scores_no_softmax
def forward(self, inputs, labels, weights, bias): """forward """ # weights.stop_gradient = False embedding_dim = paddle.shape(weights)[-1] true_log_probs, samp_log_probs, neg_samples = self.sample(labels) n_sample = neg_samples.shape[0] b1 = paddle.shape(labels)[0] b2 = paddle.shape(labels)[1] all_ids = paddle.concat([labels.reshape((-1, )), neg_samples]) all_w = paddle.gather(weights, all_ids) true_w = all_w[:-n_sample].reshape((-1, b2, embedding_dim)) sample_w = all_w[-n_sample:].reshape((n_sample, embedding_dim)) all_b = paddle.gather(bias, all_ids) true_b = all_b[:-n_sample].reshape((-1, 1)) sample_b = all_b[-n_sample:] # [B, D] * [B, 1,D] true_logist = paddle.matmul( true_w, inputs.unsqueeze(1), transpose_y=True).squeeze(1) + true_b sample_logist = paddle.matmul( inputs.unsqueeze(1), sample_w, transpose_y=True) + sample_b if self.subtract_log_q: true_logist = true_logist - true_log_probs.unsqueeze(1) sample_logist = sample_logist - samp_log_probs if self.remove_accidental_hits: hit = (paddle.equal(labels[:, :], neg_samples)).unsqueeze(1) padding = paddle.ones_like(sample_logist) * -1e30 sample_logist = paddle.where(hit, padding, sample_logist) sample_logist = sample_logist.squeeze(1) out_logist = paddle.concat([true_logist, sample_logist], axis=1) out_label = paddle.concat([ paddle.ones_like(true_logist) / self.num_true, paddle.zeros_like(sample_logist) ], axis=1) sampled_loss = F.softmax_with_cross_entropy(logits=out_logist, label=out_label, soft_label=True) return sampled_loss, out_logist, out_label
def get_pooled_embedding(self, input_ids, token_type_ids=None, position_ids=None): src_mask = input_ids == self.bos_id src_mask = paddle.cast(src_mask, "float32") # [bs, 1, 1, max_len] src_mask = paddle.unsqueeze(src_mask, axis=[1, 2]) src_mask.stop_gradient = True ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True embedding_output = self.ptm.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids) if self.use_fp16: embedding_output = paddle.cast(embedding_output, 'float16') sequence_output = self.ptm.encoder(embedding_output, src_mask) if self.use_fp16: sequence_output = paddle.cast(sequence_output, 'float32') cls_embedding = self.ptm.pooler(sequence_output) if self.output_emb_size > 0: cls_embedding = self.emb_reduce_linear(cls_embedding) cls_embedding = self.dropout(cls_embedding) cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) return cls_embedding
def gen_bias(encoder_inputs, decoder_inputs, step): decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] encoder_bsz, encoder_seqlen = encoder_inputs.shape[:2] attn_bias = paddle.reshape( paddle.arange(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) decoder_bias = paddle.cast( (paddle.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), 'float32') #[1, decoderlen, decoderlen] encoder_bias = paddle.unsqueeze( paddle.cast(paddle.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] encoder_bias = paddle.expand(encoder_bias, [encoder_bsz, decoder_seqlen, encoder_seqlen ]) #[bsz,decoderlen, encoderlen] decoder_bias = paddle.expand(decoder_bias, [decoder_bsz, decoder_seqlen, decoder_seqlen ]) #[bsz, decoderlen, decoderlen] if step > 0: bias = paddle.concat([ encoder_bias, paddle.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias ], -1) else: bias = paddle.concat([encoder_bias, decoder_bias], -1) return bias
def ernie_send(self, src_feat, dst_feat, edge_feat): """ Apply ernie model on the edge. Args: src_feat (Tensor Dict): src feature tensor dict. dst_feat (Tensor Dict): dst feature tensor dict. edge_feat (Tensor Dict): edge feature tensor dict. Returns: Tensor Dict: tensor dict which use 'msg' as the key. """ # input_ids cls = paddle.full(shape=[src_feat["term_ids"].shape[0], 1], dtype="int64", fill_value=self.cls_token_id) src_ids = paddle.concat([cls, src_feat["term_ids"]], 1) dst_ids = dst_feat["term_ids"] # sent_ids sent_ids = paddle.concat( [paddle.zeros_like(src_ids), paddle.ones_like(dst_ids)], 1) term_ids = paddle.concat([src_ids, dst_ids], 1) # build position_ids input_mask = paddle.cast(term_ids > 0, "int64") position_ids = paddle.cumsum(input_mask, axis=1) - 1 outputs = self.ernie(term_ids, sent_ids, position_ids) feature = outputs[1] return {"msg": feature}
def forward(self, x): topk_val, topk_idx, gate_score = super().forward( x, return_all_scores=True) s = gate_score.shape[0] top1_idx = topk_idx.flatten() c_e = paddle.scatter(paddle.zeros(shape=[self.tot_expert]), top1_idx, paddle.ones_like(top1_idx, dtype="float32"), overwrite=False) / s m_e = paddle.mean(F.softmax(gate_score, axis=1), axis=0) loss = paddle.mean(c_e * m_e) * (self.num_expert**2) self.set_loss(loss) cap_rate = self.capacity[0 if self.training else 1] capacity = math.ceil(cap_rate * x.shape[0]) _new_lec, _new_gec, topk_idx = limit_by_capacity(topk_idx, self.num_expert, self.world_size, capacity, group=self.group) if self.random_routing: rand_routing_prob = paddle.rand(shape=[gate_score.shape[0]], dtype="float32") topk_idx = paddle.distributed.models.moe.utils._random_routing( topk_idx, topk_val, rand_routing_prob) return topk_val, topk_idx
def rotation_3d_in_axis(points, angles, axis=0): # points: [N, point_size, 3] # angles: [N] rot_sin = paddle.sin(angles) rot_cos = paddle.cos(angles) ones = paddle.ones_like(rot_cos) zeros = paddle.zeros_like(rot_cos) if axis == 1: rot_mat_T = paddle.stack([ paddle.stack([rot_cos, zeros, -rot_sin]), paddle.stack([zeros, ones, zeros]), paddle.stack([rot_sin, zeros, rot_cos]) ]) elif axis == 2 or axis == -1: rot_mat_T = paddle.stack([ paddle.stack([rot_cos, -rot_sin, zeros]), paddle.stack([rot_sin, rot_cos, zeros]), paddle.stack([zeros, zeros, ones]) ]) elif axis == 0: rot_mat_T = paddle.stack([ paddle.stack([zeros, rot_cos, -rot_sin]), paddle.stack([zeros, rot_sin, rot_cos]), paddle.stack([ones, zeros, zeros]) ]) else: raise ValueError("axis should in range") return paddle.einsum('aij,jka->aik', (points, rot_mat_T))
def step(self, time, inputs, states, **kwargs): # Steps for decoding. # Compared to RNN, Transformer has 3D data at every decoding step inputs = paddle.reshape(inputs, [-1, 1]) # token pos = paddle.ones_like(inputs) * time # pos cell_states = map_structure(self._merge_batch_beams_with_var_dim, states.cell_states) cell_outputs, next_cell_states = self.cell((inputs, pos), cell_states, **kwargs) # Squeeze to adapt to BeamSearchDecoder which use 2D logits cell_outputs = map_structure( lambda x: paddle.squeeze(x, [1]) if len(x.shape) == 3 else x, cell_outputs) cell_outputs = map_structure(self._split_batch_beams, cell_outputs) next_cell_states = map_structure(self._split_batch_beams_with_var_dim, next_cell_states) beam_search_output, beam_search_state = self._beam_search_step( time=time, logits=cell_outputs, next_cell_states=next_cell_states, beam_state=states) next_inputs, finished = (beam_search_output.predicted_ids, beam_search_state.finished) return (beam_search_output, beam_search_state, next_inputs, finished)
def forward(self, inp): score = self.gate(inp) if self.training: noise = paddle.rand(shape=score.shape) noise = noise * 2 * self.switch_eps + 1.0 - self.switch_eps score += noise score = F.softmax(score, axis=-1) top1_score, top1_idx = paddle.topk(score, k=1, axis=-1, largest=True) cap_rate = self.capacity[0 if self.training else 1] capacity = math.ceil(cap_rate * inp.shape[0]) _new_lec, _new_gec, top1_idx = limit_by_capacity(top1_idx, self.num_expert, self.world_size, capacity, group=self.group) valid_idx = top1_idx[top1_idx > -1] valid_idx_tmp = paddle.reshape(valid_idx, shape=[len(valid_idx), 1]) fraction_expert = paddle.scatter_nd_add( x=paddle.zeros(shape=[self.tot_expert]), index=valid_idx_tmp, updates=paddle.ones_like(valid_idx, dtype=paddle.float32).reshape( shape=[len(valid_idx)]), ) / valid_idx.numel() prob_expert = score.sum(axis=0) / valid_idx.numel() loss = (fraction_expert * prob_expert).sum() * self.tot_expert self.set_loss(loss) return top1_score, top1_idx
def cal_adj_acc(self, pred_eval, model): labels = pred_eval['sup_labels'] adj_list = pred_eval['adj'] cnt_sum, cnt_correct = 0, 0 for ii in range(len(adj_list)): adj = adj_list[ii] s_label = labels['support'] q_label = labels['query'][ii] n_support = s_label.shape[0] n_query = q_label.shape[0] s_label = paddle.expand(s_label, [n_query, s_label.shape[0]]) q_label = q_label.unsqueeze(1) total_label = paddle.concat([s_label, q_label], 1) label_edge = model.layers.label2edge(total_label) pred_edge = adj #/adj.sum(1) pred_edge = paddle.where(pred_edge >= 0.5, paddle.ones_like(pred_edge), pred_edge) pred_edge = paddle.where(pred_edge < 0.5, paddle.zeros_like(pred_edge), pred_edge) cor = paddle.cast(pred_edge == label_edge, dtype='float32').sum() incor = paddle.cast(pred_edge != label_edge, dtype='float32').sum() cnt_sum += (cor + incor) cnt_correct += cor acc = cnt_correct / cnt_sum return acc
def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=1) position_ids = seq_length - ones position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64") if self.num_partitions == 1: input_embeddings = self.word_embeddings(input_ids) else: input_embeddings = paddle.distributed.split(input_ids, size=(self.vocab_size, self.hidden_size), operation='embedding', weight_attr=self.weight_attr, num_partitions=fleet.worker_num()) # paddle.static.Print(input_embeddings, summarize=-1) position_embeddings = self.position_embeddings(position_ids) # paddle.static.Print(position_embeddings, summarize=-1) token_type_embeddings = self.token_type_embeddings(token_type_ids) # paddle.static.Print(token_type_embeddings, summarize=-1) embeddings = input_embeddings + position_embeddings + token_type_embeddings # paddle.static.Print(embeddings, summarize=-1) embeddings = self.layer_norm(embeddings) # paddle.static.Print(embeddings, summarize=-1) embeddings = self.dropout(embeddings) # paddle.static.Print(embeddings) return embeddings
def forward(self, src, dsts): # src [b, 1] # dsts [b, 1+neg] src_embed = self.emb(src) if self.shared_embedding: dsts_embed = self.emb(dsts) else: dsts_embed = self.v_emb(dsts) pos_embed = dsts_embed[:, 0:1] neg_embed = dsts_embed[:, 1:] pos_logits = paddle.matmul(src_embed, pos_embed, transpose_y=True) # [batch_size, 1, 1] neg_logits = paddle.matmul( src_embed, neg_embed, transpose_y=True) # [batch_size, 1, neg_num] ones_label = paddle.ones_like(pos_logits) pos_loss = self.loss(pos_logits, ones_label) zeros_label = paddle.zeros_like(neg_logits) neg_loss = self.loss(neg_logits, zeros_label) loss = (pos_loss + neg_loss) / 2 return loss
def forward(self, pred, label, sample_weight=None): one_hot = label > 0.5 sample_weight = label != self._ignore_label if not self._from_logits: pred = F.sigmoid(pred) alpha = paddle.where(one_hot, self._alpha * sample_weight, (1 - self._alpha) * sample_weight) pt = paddle.where(one_hot, 1.0 - paddle.abs(label - pred), paddle.ones_like(pred)) beta = (1 - pt)**self._gamma loss = -alpha * beta * paddle.log( paddle.min(pt + self._eps, paddle.ones(1, dtype='float32'))) loss = self._weight * (loss * sample_weight) if self._size_average: tsum = paddle.sum(label == 1, axis=misc.get_dims_with_exclusion( len(label.shape), self._batch_axis)) loss = paddle.sum(loss, axis=misc.get_dims_with_exclusion( len(loss.shape), self._batch_axis)) / (tsum + self._eps) else: loss = paddle.sum(loss, axis=misc.get_dims_with_exclusion( len(loss.shape), self._batch_axis)) return self._scale * loss
def _init_weights(self, layer): # Initialize the weights. if isinstance(layer, nn.Linear): layer.weight.set_value( paddle.tensor.normal( mean=0.0, std=self.initializer_range if hasattr( self, "initializer_range") else self.transformer.config["initializer_range"], shape=layer.weight.shape)) if layer.bias is not None: layer.bias.set_value(paddle.zeros_like(layer.bias)) elif isinstance(layer, nn.Embedding): layer.weight.set_value( paddle.tensor.normal( mean=0.0, std=self.initializer_range if hasattr( self, "initializer_range") else self.transformer.config["initializer_range"], shape=layer.weight.shape)) if layer._padding_idx is not None: layer.weight[layer._padding_idx].set_value( paddle.zeros_like(layer.weight[layer._padding_idx])) elif isinstance(layer, nn.LayerNorm): layer.bias.set_value(paddle.zeros_like(layer.bias)) layer.weight.set_value(paddle.ones_like(layer.weight))
def libra_label_box(anchors, gt_boxes, gt_classes, positive_overlap, negative_overlap, num_classes): # TODO: use paddle API to speed up gt_classes = gt_classes.numpy() gt_overlaps = np.zeros((anchors.shape[0], num_classes)) matches = np.zeros((anchors.shape[0]), dtype=np.int32) if len(gt_boxes) > 0: proposal_to_gt_overlaps = bbox_overlaps(anchors, gt_boxes).numpy() overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1) overlaps_max = proposal_to_gt_overlaps.max(axis=1) # Boxes which with non-zero overlap with gt boxes overlapped_boxes_ind = np.where(overlaps_max > 0)[0] overlapped_boxes_gt_classes = gt_classes[ overlaps_argmax[overlapped_boxes_ind]] for idx in range(len(overlapped_boxes_ind)): gt_overlaps[overlapped_boxes_ind[idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[ overlapped_boxes_ind[idx]] matches[overlapped_boxes_ind[idx]] = overlaps_argmax[ overlapped_boxes_ind[idx]] gt_overlaps = paddle.to_tensor(gt_overlaps) matches = paddle.to_tensor(matches) matched_vals = paddle.max(gt_overlaps, axis=1) match_labels = paddle.full(matches.shape, -1, dtype='int32') match_labels = paddle.where(matched_vals < negative_overlap, paddle.zeros_like(match_labels), match_labels) match_labels = paddle.where(matched_vals >= positive_overlap, paddle.ones_like(match_labels), match_labels) return matches, match_labels, matched_vals
def forward( self, inputs: Dict[str, paddle.Tensor], ) -> Dict[str, paddle.Tensor]: input_ids = inputs.get("input_ids") attention_mask = inputs.get("attention_mask") token_type_ids = inputs.get("token_type_ids") position_ids = inputs.get("position_ids") embeddings = self.embeddings(input_ids, token_type_ids, position_ids) if attention_mask is None: attention_mask = paddle.ones_like( input_ids, device=input_ids.device) extended_attention_mask = self.get_extended_attention_mask( attention_mask, input_ids) encoder_output, attention_output = self.encoder( embeddings, extended_attention_mask) pooled_output = self.pooler(encoder_output) output_dict = { "encoder_output": encoder_output, "pooled_output": pooled_output } return output_dict
def forward(self, input_ids, position_ids=None): if position_ids is None: ones = paddle.ones_like(input_ids, dtype="int64") seq_length = paddle.cumsum(ones, axis=-1) position_ids = seq_length - ones input_embedings = self.word_embeddings(input_ids) if _global_parallel_strategy == "mp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": _global_process_mesh, "dims_mapping": [0, -1] }) elif _global_parallel_strategy == "dp_mp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": _global_process_mesh, "dims_mapping": [1, -1] }) elif _global_parallel_strategy == "mp_pp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": MPPP_MESH_LIST[0], "dims_mapping": [0, -1] }) elif _global_parallel_strategy == "dp_mp_pp": auto.shard_tensor(self.word_embeddings.weight, dist_attr={ "process_mesh": DPMPPP_MESH_LIST[0], "dims_mapping": [1, -1] }) position_embeddings = self.position_embeddings(position_ids) embeddings = input_embedings + position_embeddings embeddings = self.dropout(embeddings) return embeddings
def forward(self, feed_dict): src_embed = self.embedding(feed_dict['src']) pos_embed = self.embedding(feed_dict['pos']) # batch neg sample batch_size = feed_dict['pos'].shape[0] neg_idx = paddle.randint(low=0, high=batch_size, shape=[batch_size, self.neg_num]) negs = [] for i in range(self.neg_num): tmp = paddle.gather(pos_embed, neg_idx[:, i]) tmp = paddle.reshape(tmp, [-1, 1, self.embed_size]) negs.append(tmp) neg_embed = paddle.concat(negs, axis=1) src_embed = paddle.reshape(src_embed, [-1, 1, self.embed_size]) pos_embed = paddle.reshape(pos_embed, [-1, 1, self.embed_size]) # [batch_size, 1, 1] pos_logits = paddle.matmul(src_embed, pos_embed, transpose_y=True) # [batch_size, 1, neg_num] neg_logits = paddle.matmul(src_embed, neg_embed, transpose_y=True) ones_label = paddle.ones_like(pos_logits) pos_loss = self.loss_fn(pos_logits, ones_label) zeros_label = paddle.zeros_like(neg_logits) neg_loss = self.loss_fn(neg_logits, zeros_label) loss = (pos_loss + neg_loss) / 2 return loss
def _rgb_to_hsv(img): """Convert a image Tensor from RGB to HSV. This implementation is based on Pillow ( https://github.com/python-pillow/Pillow/blob/main/src/libImaging/Convert.c) """ maxc = img.max(axis=-3) minc = img.min(axis=-3) is_equal = paddle.equal(maxc, minc) one_divisor = paddle.ones_like(maxc) c_delta = maxc - minc # s is 0 when maxc == minc, set the divisor to 1 to avoid zero divide. s = c_delta / paddle.where(is_equal, one_divisor, maxc) r, g, b = img.unbind(axis=-3) c_delta_divisor = paddle.where(is_equal, one_divisor, c_delta) # when maxc == minc, there is r == g == b, set the divisor to 1 to avoid zero divide. rc = (maxc - r) / c_delta_divisor gc = (maxc - g) / c_delta_divisor bc = (maxc - b) / c_delta_divisor hr = (maxc == r).astype(maxc.dtype) * (bc - gc) hg = ((maxc == g) & (maxc != r)).astype(maxc.dtype) * (rc - bc + 2.0) hb = ((maxc != r) & (maxc != g)).astype(maxc.dtype) * (gc - rc + 4.0) h = (hr + hg + hb) / 6.0 + 1.0 h = h - h.trunc() return paddle.stack([h, s, maxc], axis=-3)
def forward(self, input, target): """ Args: inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ inputs = input["features"] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) bs = inputs.shape[0] # compute distance dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) dist = dist + dist.t() dist = paddle.addmm(input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) dist = paddle.clip(dist, min=1e-12).sqrt() # hard negative mining is_pos = paddle.expand(target, (bs, bs)).equal( paddle.expand(target, (bs, bs)).t()) is_neg = paddle.expand(target, (bs, bs)).not_equal( paddle.expand(target, (bs, bs)).t()) # `dist_ap` means distance(anchor, positive) ## both `dist_ap` and `relative_p_inds` with shape [N, 1] ''' dist_ap, relative_p_inds = paddle.max( paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = paddle.min( paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) ''' dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos), (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg), (bs, -1)), axis=1, keepdim=True) # shape [N] dist_ap = paddle.squeeze(dist_ap, axis=1) dist_an = paddle.squeeze(dist_an, axis=1) # Compute ranking hinge loss y = paddle.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return {"TripletLossV2": loss}
def forward(self, inputs): return paddle.where( condition=inputs <= self._lower_bound, x=paddle.zeros_like(inputs), y=paddle.where(condition=inputs >= self._upper_bound, x=paddle.ones_like(inputs), y=self._a3 * (inputs**3) + self._a1 * inputs + self._a0))
def generate_segment_id(index): zeros = paddle.zeros(index[-1] + 1, dtype="int32") index = index[:-1] segments = paddle.scatter( zeros, index, paddle.ones_like( index, dtype="int32"), overwrite=False) segments = paddle.cumsum(segments)[:-1] - 1 return segments
def forward(self, item_his_emb, seq_len): """forward Args: item_his_emb : [B, seqlen, dim] seq_len : [B, 1] """ batch_size = item_his_emb.shape[0] seq_len_tile = paddle.tile(seq_len, [1, self.k_max]) mask = self.sequence_mask(seq_len_tile, self.maxlen) pad = paddle.ones_like(mask, dtype="float32") * (-2**32 + 1) # S*e low_capsule_new = paddle.matmul(item_his_emb, self.bilinear_mapping_matrix) low_capsule_new_nograd = paddle.assign(low_capsule_new) low_capsule_new_nograd.stop_gradient = True B = paddle.tile(self.routing_logits, [paddle.shape(item_his_emb)[0], 1, 1]) for i in range(self.iters - 1): B_mask = paddle.where(mask, B, pad) # print(B_mask) W = F.softmax(B_mask, axis=1) high_capsule_tmp = paddle.matmul(W, low_capsule_new_nograd) high_capsule = self.squash(high_capsule_tmp) B_delta = paddle.matmul(high_capsule, low_capsule_new_nograd, transpose_y=True) B += B_delta / paddle.maximum( paddle.norm(B_delta, p=2, axis=-1, keepdim=True), paddle.ones_like(B_delta)) B_mask = paddle.where(mask, B, pad) W = F.softmax(B_mask, axis=1) # paddle.static.Print(W) high_capsule_tmp = paddle.matmul(W, low_capsule_new) # high_capsule_tmp.stop_gradient = False high_capsule = self.squash(high_capsule_tmp) # high_capsule.stop_gradient = False return high_capsule, W, seq_len