def forward(self, x): x = self.decoder_unpool0(x) p = self.decoder_block0(x) x = F.leaky_relu(x + p) x = self.decoder_unpool1(x) p = self.decoder_block1(x) x = F.leaky_relu(x + p) x = self.decoder_unpool2(x) p1 = self.decoder_block2(x) p2 = self.decoder_block2_shortcut(x) x = F.leaky_relu(p1 + p2) p1 = self.decoder_block3(x) p2 = self.decoder_block3_shortcut(x) x = F.leaky_relu(p1 + p2) x = self.decoder_block4(x) # x = x[:,0,:,:,:] # x = F.softmax(x, axis=1) x = paddle.sum(x, axis=1) x = paddle.clip(x, min=0, max=1) return x
def add_input(self, x, condition=None): """Compute the output distribution (represented by its parameters) for a step. It works similarily with the ``forward`` method but in a ``step-in-step-out`` fashion. Parameters ----------- x : Tensor [shape=(B,)] A step of the input waveform. condition : Tensor, optional [shape=(B, C_cond)] A step of the upsampled condition. Defaults to None. Returns -------- Tensor: [shape=(B, C_output)] A step of the parameters of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = paddle.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, C) else: x = paddle.unsqueeze(x, -1) # (B, 1) x = self.embed(x) # (B, C) # Residual & Skip-conenection & linears z = self.resnet.add_input(x, condition) z = F.relu(self.proj2(F.relu(self.proj1(z)))) # (B, C) # Output y = self.proj3(z) return y
def generate_relative_positions_embeddings(self, length, depth, max_relative_position=127): vocab_size = max_relative_position * 2 + 1 range_vec = paddle.arange(length) range_mat = paddle.tile(range_vec, repeat_times=[length]).reshape( (length, length)) distance_mat = range_mat - paddle.t(range_mat) distance_mat_clipped = paddle.clip(distance_mat.astype('float32'), -max_relative_position, max_relative_position) final_mat = distance_mat_clipped + max_relative_position embeddings_table = np.zeros([vocab_size, depth]) for pos in range(vocab_size): for i in range(depth // 2): embeddings_table[pos, 2 * i] = np.sin( pos / np.power(10000, 2 * i / depth)) embeddings_table[pos, 2 * i + 1] = np.cos( pos / np.power(10000, 2 * i / depth)) embeddings_table_tensor = paddle.to_tensor(embeddings_table, dtype='float32') flat_relative_positions_matrix = final_mat.reshape((-1, )) one_hot_relative_positions_matrix = paddle.nn.functional.one_hot( flat_relative_positions_matrix.astype('int64'), num_classes=vocab_size) embeddings = paddle.matmul(one_hot_relative_positions_matrix, embeddings_table_tensor) my_shape = final_mat.shape my_shape.append(depth) embeddings = embeddings.reshape(my_shape) return embeddings
def forward(self, raw_features, coarse_volumes): n_views_rendering = coarse_volumes.shape[1] raw_features = paddle.split(raw_features, num_or_sections=raw_features.shape[1], axis=1) volume_weights = [] for i in range(n_views_rendering): raw_feature = paddle.squeeze(raw_features[i], axis=1) # print("torch.Size([batch_size, 9, 32, 32, 32]) ---",raw_feature.shape) volume_weight = self.layer1(raw_feature) # print("torch.Size([batch_size, 16, 32, 32, 32]) ---",volume_weight.shape) # volume_weight = self.layer2(volume_weight) # print("torch.Size([batch_size, 8, 32, 32, 32]) ---",volume_weight.shape) # volume_weight = self.layer3(volume_weight) # print("torch.Size([batch_size, 4, 32, 32, 32]) ---",volume_weight.shape) # volume_weight = self.layer4(volume_weight) # print("torch.Size([batch_size, 2, 32, 32, 32]) ---",volume_weight.shape) # volume_weight = self.layer5(volume_weight) # print("torch.Size([batch_size, 1, 32, 32, 32]) ---",volume_weight.shape) # volume_weight = paddle.squeeze(volume_weight, axis=1) # print("torch.Size([batch_size, 32, 32, 32]) ---",volume_weight.shape) # volume_weights.append(volume_weight) volume_weights = paddle.transpose(paddle.stack(volume_weights), perm=[1, 0, 2, 3, 4]) volume_weights = paddle.nn.functional.softmax(volume_weights, axis=1) # print("torch.Size([batch_size, n_views, 32, 32, 32]) ---",volume_weights.shape) # # print("torch.Size([batch_size, n_views, 32, 32, 32]) ---",coarse_volumes.shape) # coarse_volumes = coarse_volumes * volume_weights coarse_volumes = paddle.sum(coarse_volumes, axis=1) return paddle.clip(coarse_volumes, min=0, max=1)
def forward(self, x, condition=None): """Forward pass of ``WaveNet``. Parameters ----------- x : Tensor [shape=(B, T)] The input waveform. condition : Tensor, optional [shape=(B, C_cond, T)] the upsampled condition. Defaults to None. Returns ------- Tensor: [shape=(B, T, C_output)] The parameters of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = paddle.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, T, C) else: x = paddle.unsqueeze(x, -1) # (B, T, 1) x = self.embed(x) # (B, T, C) x = paddle.transpose(x, perm=[0, 2, 1]) # (B, C, T) # Residual & Skip-conenection & linears z = self.resnet(x, condition) z = paddle.transpose(z, [0, 2, 1]) z = F.relu(self.proj2(F.relu(self.proj1(z)))) y = self.proj3(z) return y
def mu_law_encode(x: Tensor, mu: int = 256, quantized: bool = True) -> Tensor: """Mu-law encoding. Compute the mu-law decoding given an input code. When quantized is True, the result will be converted to integer in range [0,mu-1]. Otherwise, the resulting signal is in range [-1,1] Parameters: x(Tensor): the input tensor of arbitrary shape to be encoded. mu(int): the maximum value (depth) of encoded signal. The signal will be clip to be in range [0,mu-1]. quantized(bool): indicate whether the signal will quantized to integers. Examples: .. code-block:: python import paddle import paddleaudio.functional as F F.mu_law_encode(paddle.randn((2, 8))) >> Tensor(shape=[2, 8], dtype=int32, place=CUDAPlace(0), stop_gradient=True, [[0, 5, 30, 255, 255, 255, 12, 13], [0, 241, 8, 243, 7, 35, 84, 228]]) Reference: https://en.wikipedia.org/wiki/%CE%9C-law_algorithm """ mu = mu - 1 y = paddle.sign(x) * paddle.log1p(mu * paddle.abs(x)) / math.log1p(mu) if quantized: y = (y + 1) / 2 * mu + 0.5 # convert to [0 , mu-1] y = paddle.clip(y, min=0, max=mu).astype('int32') return y
def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt): match_matrix = np.zeros_like(cost_matrix.numpy()) # select candidate topk ious for dynamic-k calculation topk_ious, _ = paddle.topk(pairwise_ious, self.candidate_topk, axis=0) # calculate dynamic k for each gt dynamic_ks = paddle.clip(topk_ious.sum(0).cast('int'), min=1) for gt_idx in range(num_gt): _, pos_idx = paddle.topk(cost_matrix[:, gt_idx], k=dynamic_ks[gt_idx], largest=False) match_matrix[:, gt_idx][pos_idx.numpy()] = 1.0 del topk_ious, dynamic_ks, pos_idx # match points more than two gts extra_match_gts_mask = match_matrix.sum(1) > 1 if extra_match_gts_mask.sum() > 0: cost_matrix = cost_matrix.numpy() cost_argmin = np.argmin(cost_matrix[extra_match_gts_mask, :], axis=1) match_matrix[extra_match_gts_mask, :] *= 0.0 match_matrix[extra_match_gts_mask, cost_argmin] = 1.0 # get foreground mask match_fg_mask_inmatrix = match_matrix.sum(1) > 0 match_gt_inds_to_fg = match_matrix[match_fg_mask_inmatrix, :].argmax(1) return match_gt_inds_to_fg, match_fg_mask_inmatrix
def compute_softmax_loss(self, y, t): """Compute the loss when output distributions are categorial distributions. Parameters ---------- y : Tensor [shape=(B, T, C_output)] The logits of the output distributions. t : Tensor [shape=(B, T)] The target audio. The audio is first quantized then used as the target. Notes ------- Output distributions whose input contains padding is neglected in loss computation. So the first ``context_size`` steps does not contribute to the loss. Returns -------- Tensor: [shape=(1,)] The loss. """ # context size is not taken into account y = y[:, self.context_size:, :] t = t[:, self.context_size:] t = paddle.clip(t, min=-1.0, max=0.99999) quantized = quantize(t, n_bands=self.output_dim) label = paddle.unsqueeze(quantized, -1) loss = F.softmax_with_cross_entropy(y, label) reduced_loss = paddle.mean(loss) return reduced_loss
def degree_norm(graph, mode="indegree"): """Calculate the degree normalization of a graph Args: graph: the graph object from (:code:`Graph`) mode: which degree to be normalized ("indegree" or "outdegree") return: A tensor with shape (num_nodes, 1). """ assert mode in [ 'indegree', 'outdegree' ], "The degree_norm mode should be in ['indegree', 'outdegree']. But recieve mode=%s" % mode if mode == "indegree": degree = graph.indegree() elif mode == "outdegree": degree = graph.outdegree() norm = paddle.cast(degree, dtype=paddle.get_default_dtype()) norm = paddle.clip(norm, min=1.0) norm = paddle.pow(norm, -0.5) norm = paddle.reshape(norm, [-1, 1]) return norm
def get_loss(self, pred_hm, pred_wh, target_hm, box_target, target_weight): pred_hm = paddle.clip(F.sigmoid(pred_hm), 1e-4, 1 - 1e-4) hm_loss = self.hm_loss(pred_hm, target_hm) H, W = target_hm.shape[2:] mask = paddle.reshape(target_weight, [-1, H, W]) avg_factor = paddle.sum(mask) + 1e-4 base_step = self.down_ratio shifts_x = paddle.arange(0, W * base_step, base_step, dtype='int32') shifts_y = paddle.arange(0, H * base_step, base_step, dtype='int32') shift_y, shift_x = paddle.tensor.meshgrid([shifts_y, shifts_x]) base_loc = paddle.stack([shift_x, shift_y], axis=0) base_loc.stop_gradient = True pred_boxes = paddle.concat( [0 - pred_wh[:, 0:2, :, :] + base_loc, pred_wh[:, 2:4] + base_loc], axis=1) pred_boxes = paddle.transpose(pred_boxes, [0, 2, 3, 1]) boxes = paddle.transpose(box_target, [0, 2, 3, 1]) boxes.stop_gradient = True pred_boxes, boxes, mask = self.filter_box_by_weight( pred_boxes, boxes, mask) mask.stop_gradient = True wh_loss = self.wh_loss(pred_boxes, boxes, iou_weight=mask.unsqueeze(1)) wh_loss = wh_loss / avg_factor ttf_loss = {'hm_loss': hm_loss, 'wh_loss': wh_loss} return ttf_loss
def forward(self, x): x = x.unsqueeze(1) x = self.conv1(x) x = self.relu(x) x = self.bn1(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = x.reshape((x.shape[0], -1, x.shape[-1])) w = self.attention(x) if self.encoder_type == "SAP": x = paddle.sum(x * w, axis=2) elif self.encoder_type == "ASP": mu = paddle.sum(x * w, axis=2) sg = paddle.sum((x**2) * w, axis=2) - mu**2 sg = paddle.clip(sg, min=1e-5) sg = paddle.sqrt(sg) x = paddle.concat((mu, sg), 1) x = x.reshape((x.shape[0], -1)) x = self.fc(x) return x
def forward(self, dist_feat): dist = paddle.clip(dist_feat.squeeze(), 1.0, self.cut_dist - 1e-6).astype('int64') - 1 eh_emb = self.dist_embedding_layer(dist) eh_emb = self.dist_input_layer(eh_emb) # eh_emb = paddle.cast(eh_emb, 'float64') return eh_emb
def forward(self, input_data): expert_outputs = [] for i in range(0, self.expert_num): linear_out = self._param_expert[i](input_data) expert_output = F.relu(linear_out) expert_outputs.append(expert_output) expert_concat = paddle.concat(x=expert_outputs, axis=1) expert_concat = paddle.reshape(expert_concat, [-1, self.expert_num, self.expert_size]) output_layers = [] for i in range(0, self.gate_num): cur_gate_linear = self._param_gate[i](input_data) cur_gate = F.softmax(cur_gate_linear) cur_gate = paddle.reshape(cur_gate, [-1, self.expert_num, 1]) cur_gate_expert = paddle.multiply(x=expert_concat, y=cur_gate) cur_gate_expert = paddle.sum(x=cur_gate_expert, axis=1) cur_tower = self._param_tower[i](cur_gate_expert) cur_tower = F.relu(cur_tower) out = self._param_tower_out[i](cur_tower) out = F.softmax(out) out = paddle.clip(out, min=1e-15, max=1.0 - 1e-15) output_layers.append(out) return output_layers
def forward(self): fpn_rois = self.input('FpnRois', 0) areas = self.bbox_area(fpn_rois) scale = paddle.sqrt(areas) num_level = self.max_level - self.min_level + 1 target_level = paddle.log(scale / self.refer_scale + 1e-06) / np.log(2) target_level = paddle.floor(self.refer_level + target_level) target_level = paddle.clip(target_level, min=self.min_level, max=self.max_level) rois = list() rois_idx_order = list() for level in range(self.min_level, self.max_level + 1): level_tensor = paddle.full_like(target_level, fill_value=level) res = paddle.equal(target_level, level_tensor) res = paddle.squeeze(res, axis=1) res = paddle.cast(res, dtype='int32') index = paddle.nonzero(res) roi = paddle.gather(fpn_rois, index, axis=0) rois.append(roi) rois_idx_order.append(index) rois_idx_order = paddle.concat(rois_idx_order, axis=0) size = paddle.shape(rois_idx_order)[0] _, rois_idx_restore = paddle.topk(rois_idx_order, axis=0, sorted=True, largest=False, k=size) #rois_idx_restore = paddle.cast(rois_idx_restore, dtype='int32') return {'MultiFpnRois': rois, 'RestoreIndex': [rois_idx_restore]}
def cdist(self, a, b): a_s = paddle.norm(a, p=2, axis=-1).pow(2) b_s = paddle.norm(b, p=2, axis=-1).pow(2) dist_score = -2 * paddle.bmm(a, b.transpose( [0, 2, 1])) + b_s.unsqueeze(-2) + a_s.unsqueeze(-1) dist_score = paddle.sqrt(paddle.clip(dist_score, min=1e-30)) return dist_score
def forward(self, outputs, targets): """ This performs the loss computation. Parameters: outputs: dict of tensors, see the output specification of the model for the format targets: list of dicts, such that len(targets) == batch_size. The expected keys in each dict depends on the losses applied, see each loss' doc """ outputs_without_aux = {k: v for k, v in outputs.items() if k !=\ 'aux_outputs'} indices = self.matcher(outputs_without_aux, targets) num_boxes = sum(len(t['labels']) for t in targets) num_boxes = paddle.to_tensor([num_boxes], dtype=torch.float, device =next(iter(outputs.values())).device) if is_dist_avail_and_initialized(): torch2paddle.all_reduce(num_boxes) num_boxes = paddle.clip(num_boxes / get_world_size(), min=1).item() losses = {} for loss in self.losses: losses.update(self.get_loss(loss, outputs, targets, indices, num_boxes)) if 'aux_outputs' in outputs: for i, aux_outputs in enumerate(outputs['aux_outputs']): indices = self.matcher(aux_outputs, targets) for loss in self.losses: if loss == 'masks': continue kwargs = {} if loss == 'labels': kwargs = {'log': False} l_dict = self.get_loss(loss, aux_outputs, targets, indices, num_boxes, **kwargs) l_dict = {(k + f'_{i}'): v for k, v in l_dict.items()} losses.update(l_dict) return losses
def degree_norm(self, g): degree = g.indegree() + 1 # self loop norm = paddle.cast(degree, dtype=paddle.get_default_dtype()) norm = paddle.clip(norm, min=1.0) norm = paddle.pow(norm, -0.5) norm = paddle.reshape(norm, [-1, 1]) return norm
def forward(self, sender_receiver, is_training): # Construct permutation input sender_emb = self.feature_emb_edge( paddle.cast(sender_receiver[0, :], 'int32')) receiver_emb = self.feature_emb_edge( paddle.cast(sender_receiver[1, :], 'int32')) _input = paddle.multiply(sender_emb, receiver_emb) h_relu = self.dropout(self.relu(self.linear1(_input))) loc = self.linear2(h_relu) if is_training: u = paddle.rand(loc.shape, dtype=loc.dtype) u.stop_gradient = False logu = paddle.log2(u) logmu = paddle.log2(1 - u) sum_log = loc + logu - logmu s = F.sigmoid(sum_log / self.temp) s = s * (self.inter_max - self.inter_min) + self.inter_min else: s = F.sigmoid(loc) * (self.inter_max - self.inter_min) + self.inter_min s = paddle.clip(s, min=0, max=1) l0_penaty = F.sigmoid( loc - self.temp * np.log2(-self.inter_min / self.inter_max)).mean() return s, l0_penaty
def forward(self, input, target): """ Args: inputs: feature matrix with shape (batch_size, feat_dim) target: ground truth labels with shape (num_classes) """ inputs = input["features"] if self.normalize_feature: inputs = 1. * inputs / (paddle.expand_as( paddle.norm(inputs, p=2, axis=-1, keepdim=True), inputs) + 1e-12) bs = inputs.shape[0] # compute distance dist = paddle.pow(inputs, 2).sum(axis=1, keepdim=True).expand([bs, bs]) dist = dist + dist.t() dist = paddle.addmm(input=dist, x=inputs, y=inputs.t(), alpha=-2.0, beta=1.0) dist = paddle.clip(dist, min=1e-12).sqrt() # hard negative mining is_pos = paddle.expand(target, (bs, bs)).equal( paddle.expand(target, (bs, bs)).t()) is_neg = paddle.expand(target, (bs, bs)).not_equal( paddle.expand(target, (bs, bs)).t()) # `dist_ap` means distance(anchor, positive) ## both `dist_ap` and `relative_p_inds` with shape [N, 1] ''' dist_ap, relative_p_inds = paddle.max( paddle.reshape(dist[is_pos], (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = paddle.min( paddle.reshape(dist[is_neg], (bs, -1)), axis=1, keepdim=True) ''' dist_ap = paddle.max(paddle.reshape(paddle.masked_select(dist, is_pos), (bs, -1)), axis=1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an = paddle.min(paddle.reshape(paddle.masked_select(dist, is_neg), (bs, -1)), axis=1, keepdim=True) # shape [N] dist_ap = paddle.squeeze(dist_ap, axis=1) dist_an = paddle.squeeze(dist_an, axis=1) # Compute ranking hinge loss y = paddle.ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) return {"TripletLossV2": loss}
def forward(self, state): x = F.relu(self.l1(state)) x = F.relu(self.l2(x)) mean = self.mean(x) log_std = F.relu(self.std(x)) log_std = paddle.clip(log_std, self.log_min_std, self.log_max_std) return mean, log_std
def forward(self, obs): x = F.relu(self.l1(obs)) x = F.relu(self.l2(x)) act_mean = self.mean_linear(x) act_std = self.std_linear(x) act_log_std = paddle.clip(act_std, min=LOG_SIG_MIN, max=LOG_SIG_MAX) return act_mean, act_log_std
def test_clip_dygraph(self): place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() paddle.disable_static(place) data_shape = [1, 9, 9, 4] data = np.random.random(data_shape).astype('float32') images = paddle.to_variable(data, dtype='float32') v_min = paddle.to_variable(np.array([0.2], dtype=np.float32)) v_max = paddle.to_variable(np.array([0.8], dtype=np.float32)) out_1 = paddle.clip(images, min=0.2, max=0.8) out_2 = paddle.clip(images, min=0.2, max=0.9) out_3 = paddle.clip(images, min=v_min, max=v_max) self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
def clip_grad_value_(parameters, clip_value): r"""Clips gradient of an iterable of parameters at specified value. Gradients are modified in-place. Arguments: parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a single Tensor that will have gradients normalized clip_value (float or int): maximum allowed value of the gradients. The gradients are clipped in the range :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]` """ if isinstance(parameters, paddle.Tensor): parameters = [parameters] clip_value = float(clip_value) for p in filter(lambda p: p.grad is not None, parameters): paddle.clip(p.grad, min=-clip_value, max=clip_value)
def update_target_network_clip(self): for param_q, param_k in zip(self.towers[0].parameters(), self.towers[1].parameters()): # paddle.assign((param_k * self.m + param_q * (1. - self.m)), param_k) paddle.assign( param_k - (1 - self.m) * paddle.clip( (param_k - param_q), min=-1.0, max=1.0), param_k) param_k.stop_gradient = True
def backward(R_p): Z = [] for _ in range(self.num): Z.append(self.X) Spp = [] Spn = [] for z, rp, rn in zip(Z, R_p): Spp.append(safe_divide(paddle.clip(rp, min=0), z)) Spn.append(safe_divide(paddle.clip(rp, max=0), z)) Cpp = self.gradprop(Z, self.X, Spp)[0] Cpn = self.gradprop(Z, self.X, Spn)[0] Rp = self.X * (Cpp * Cpn) return Rp
def forward(self, distance, label): label = -1 * (2 * label - 1) # print(label, distance) pos_num = paddle.sum((label == 1).astype('float32')) + 0.0001 neg_num = paddle.sum((label == -1).astype('float32')) + 0.0001 loss_1 = paddle.sum((1 + label) / 2 * paddle.pow(distance, 2)) / pos_num loss_2 = paddle.sum((1 - label) / 2 * paddle.pow(paddle.clip(self.margin - distance, min=0.0), 2)) / neg_num loss = loss_1 + loss_2 return loss
def _get_Adc_loss(self, feed_dict, node_repr): node_i_repr = paddle.gather(node_repr, feed_dict['Ad_node_i']) node_j_repr = paddle.gather(node_repr, feed_dict['Ad_node_j']) node_ij_repr = paddle.concat([node_i_repr, node_j_repr], 1) logits = self.Adc_mlp.forward(node_ij_repr) atom_dist = paddle.clip(feed_dict['Ad_atom_dist'], 0.0, 20.0) atom_dist_id = paddle.cast(atom_dist / 20.0 * self.Adc_vocab, 'int64') loss = self.Adc_loss(logits, atom_dist_id) return loss
def get_loss(self, heatmap, size, offset, weights, inputs): heatmap_target = inputs['heatmap'] size_target = inputs['size'] offset_target = inputs['offset'] index = inputs['index'] mask = inputs['index_mask'] heatmap = paddle.clip(F.sigmoid(heatmap), 1e-4, 1 - 1e-4) heatmap_loss = self.focal_loss(heatmap, heatmap_target) size = paddle.transpose(size, perm=[0, 2, 3, 1]) size_n, size_h, size_w, size_c = size.shape size = paddle.reshape(size, shape=[size_n, -1, size_c]) index = paddle.unsqueeze(index, 2) batch_inds = list() for i in range(size_n): batch_ind = paddle.full(shape=[1, index.shape[1], 1], fill_value=i, dtype='int64') batch_inds.append(batch_ind) batch_inds = paddle.concat(batch_inds, axis=0) index = paddle.concat(x=[batch_inds, index], axis=2) pos_size = paddle.gather_nd(size, index=index) mask = paddle.unsqueeze(mask, axis=2) size_mask = paddle.expand_as(mask, pos_size) size_mask = paddle.cast(size_mask, dtype=pos_size.dtype) pos_num = size_mask.sum() size_mask.stop_gradient = True size_target.stop_gradient = True size_loss = F.l1_loss(pos_size * size_mask, size_target * size_mask, reduction='sum') size_loss = size_loss / (pos_num + 1e-4) offset = paddle.transpose(offset, perm=[0, 2, 3, 1]) offset_n, offset_h, offset_w, offset_c = offset.shape offset = paddle.reshape(offset, shape=[offset_n, -1, offset_c]) pos_offset = paddle.gather_nd(offset, index=index) offset_mask = paddle.expand_as(mask, pos_offset) offset_mask = paddle.cast(offset_mask, dtype=pos_offset.dtype) pos_num = offset_mask.sum() offset_mask.stop_gradient = True offset_target.stop_gradient = True offset_loss = F.l1_loss(pos_offset * offset_mask, offset_target * offset_mask, reduction='sum') offset_loss = offset_loss / (pos_num + 1e-4) det_loss = weights['heatmap'] * heatmap_loss + weights[ 'size'] * size_loss + weights['offset'] * offset_loss return { 'det_loss': det_loss, 'heatmap_loss': heatmap_loss, 'size_loss': size_loss, 'offset_loss': offset_loss }
def compute_mog_loss(self, y, t): """Compute the loss where output distributions is a mixture of Gaussians distributions. Parameters ----------- y : Tensor [shape=(B, T, C_output)] The parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where ``n_mixture`` means the number of Gaussians in the mixture. t : Tensor [shape=(B, T)] The target audio. Notes ------- Output distributions whose input contains padding is neglected in loss computation. So the first ``context_size`` steps does not contribute to the loss. Returns -------- Tensor: [shape=(1,)] The loss. """ n_mixture = self.output_dim // 3 # context size is not taken in to account y = y[:, self.context_size:, :] t = t[:, self.context_size:] w, mu, log_std = paddle.split(y, 3, axis=2) # 100.0 is just a large float log_std = paddle.clip(log_std, min=self.log_scale_min, max=100.) inv_std = paddle.exp(-log_std) p_mixture = F.softmax(w, -1) t = paddle.unsqueeze(t, -1) if n_mixture > 1: # t = F.expand_as(t, log_std) t = paddle.expand(t, [-1, -1, n_mixture]) x_std = inv_std * (t - mu) exponent = paddle.exp(-0.5 * x_std * x_std) pdf_x = 1.0 / math.sqrt(2.0 * math.pi) * inv_std * exponent pdf_x = p_mixture * pdf_x # pdf_x: [bs, len] pdf_x = paddle.sum(pdf_x, -1) per_sample_loss = -paddle.log(pdf_x + 1e-9) loss = paddle.mean(per_sample_loss) return loss
def forward(self, inputs): pool = self.pool2d_gap(inputs) pool = paddle.squeeze(pool, axis=[2, 3]) squeeze = self.squeeze(pool) squeeze = F.relu(squeeze) excitation = self.excitation(squeeze) excitation = paddle.clip(x=excitation, min=0, max=1) excitation = paddle.unsqueeze(excitation, axis=[2, 3]) out = paddle.multiply(inputs, excitation) return out