def forward(self, input): x = self.model(input) gap = adaptive_pool2d(x, 1, pool_type='avg') gap_logit = self.gap_fc(reshape(gap, shape=[x.shape[0], -1])) gap_weight = list(self.gap_fc.parameters())[0] gap_weight = transpose(gap_weight, perm=[1, 0]) gap = x * unsqueeze(unsqueeze(gap_weight, 2), 3) gmp = adaptive_pool2d(x, 1, pool_type='max') gmp_logit = self.gmp_fc(reshape(gmp, shape=[x.shape[0], -1])) gmp_weight = list(self.gmp_fc.parameters())[0] gmp_weight = transpose(gmp_weight, perm=[1, 0]) gmp = x * unsqueeze(unsqueeze(gmp_weight, 2), 3) cam_logit = concat([gap_logit, gmp_logit], 1) x = concat([gap, gmp], 1) x = self.leaky_relu(self.conv1x1(x)) heatmap = reduce_sum(x, dim=1, keep_dim=True) x = self.pad(x) out = self.conv(x) return out, cam_logit, heatmap
def masks_to_boxes(masks): """ Compute the bounding boxes around the provided masks The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. Returns a [N, 4] tensors, with the boxes in xyxy format """ if np.sum(masks.shape) == 0: return dg.to_variable(np.zeros((0, 4))) h, w = masks.shape[-2:] y = dg.to_variable(np.arange(0, h, 1, dtype="float32")) x = dg.to_variable(np.arange(0, w, 1, dtype="float32")) y, x = T.meshgrid([y, x]) # [h, w] x_mask = (masks * L.unsqueeze(x, [0])) # [N, H, W] x_max = L.reduce_max(L.flatten(x_mask, axis=1), dim=-1) non_mask = dg.to_variable(~masks.numpy()) x_mask[non_mask] = 1e8 x_min = L.reduce_min(L.flatten(x_mask, axis=1), dim=-1) y_mask = (masks * L.unsqueeze(y, [0])) # [N, H, W] y_max = L.reduce_max(L.flatten(y_mask, axis=1), dim=-1) y_mask[non_mask] = 1e8 y_min = L.reduce_min(L.flatten(y_mask, axis=1), dim=-1) return L.stack([x_min, y_min, x_max, y_max], 1)
def forward(self, input): x = self.DownBlock(input) gap = L.adaptive_pool2d(x, 1, pool_type='avg') gap_logit = self.gap_fc(L.reshape(gap, (x.shape[0], -1))) gap_weight = self.gap_fc.weight gap = x * L.unsqueeze(gap_weight, (2, 3)) gmp = L.adaptive_pool2d(x, 1, pool_type='max') gmp_logit = self.gmp_fc(L.reshape(gmp, (x.shape[0], -1))) gmp_weight = self.gmp_fc.weight gmp = x * L.unsqueeze(gmp_weight, (2, 3)) cam_logit = L.concat([gap_logit, gmp_logit], 1) x = L.concat([gap, gmp], 1) x = self.relu(self.conv1x1(x)) heatmap = L.reduce_sum(x, dim=1, keep_dim=True) if self.light: x_ = L.adaptive_pool2d(x, 1, pool_type='avg') x_ = self.FC(L.reshape(x_, (x_.shape[0], -1))) else: x_ = self.FC(L.reshape(x, (x.shape[0], -1))) gamma, beta = self.gamma(x_), self.beta(x_) for i in range(self.n_blocks): x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta) out = self.UpBlock2(x) return out, cam_logit, heatmap
def generalied_box_iou(boxes1, boxes2): """ Generalized IoU from https://giou.stanford.edu/ The boxes should be in [x0, y0, x1, y1] format Returns a [N, M] pairwise matrix, where N = len(boxes1) and M = len(boxes2) """ # degenerate boxes gives inf / nan results # so do an early check assert L.reduce_all(boxes1[:, 2:] >= boxes1[:, :2]) assert L.reduce_all(boxes2[:, 2:] >= boxes2[:, :2]) iou, union = box_iou(boxes1, boxes2) N, M = boxes1.shape[0], boxes2.shape[0] boxes1 = L.unsqueeze(boxes1, axes=[1]) # [N, 1, 4] boxes1 = L.expand(boxes1, [1, M, 1]) # [N, M, 4] boxes2 = L.unsqueeze(boxes2, axes=[0]) # [1, M, 4] boxes2 = L.expand(boxes2, [N, 1, 1]) # [N, M, 4] lt = L.elementwise_min(boxes1[:, :, :2], boxes2[:, :, :2]) # [N, M, 2] rb = L.elementwise_max(boxes1[:, :, 2:], boxes2[:, :, 2:]) # [N, M, 2] wh = L.clip(rb - lt, min=0, max=1e8) # [N, M, 2] area = wh[:, :, 0] * wh[:, :, 1] + 1e-4 # prevent devided by zero return iou - (area - union) / area
def get_embedding(self, num_embeddings, embedding_dim, padding_idx=None): """ Build sinusoidal embeddings. This matches the implementation in tensor2tensor, but differs slightly from the description in Section 3.5 of "Attention Is All You Need". """ half_dim = embedding_dim // 2 emb = layers.log(float(10000)) / (half_dim - -1) emb = layers.exp(layers.arange( start=0, end=half_dim, dtype='float32') * -emb) # [num_embeddings, embedding_dim // 2] emb = layers.unsqueeze(layers.arange(-num_embeddings // 2, num_embeddings // 2, dtype='float32'), axis=1) *\ layers.unsqueeze(emb, axis=0) emb = layers.concat([layers.sin(emb), layers.cos(emb)], dim=1) # [num_embeddings, embedding_dim] if embedding_dim % 2 == 1: emb = layers.concat( [emb, layers.zeros(shape=(num_embeddings, 1))], dim=1) if padding_idx is not None: emb[paddings_idx, :] = 0 self.origin_shift = num_embeddings // 2 return emb
def no_nms(bboxes, scores, score_threshold, keep_top_k): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if keep_top_k > 0 and len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def forward(self, input, gamma, beta): in_mean, in_var = reduce_mean(input, dim=[2, 3], keep_dim=True), my_var(input, dim=[2, 3], keep_dim=True) out_in = (input - in_mean) / sqrt(in_var + self.eps) ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3], keep_dim=True), my_var(input, dim=[1, 2, 3], keep_dim=True) out_ln = (input - ln_mean) / sqrt(ln_var + self.eps) ex_rho = expand(self.rho, (input.shape[0], 1, 1, 1)) out = ex_rho * out_in + (1 - ex_rho) * out_ln gamma = unsqueeze(gamma, axes=2) gamma = unsqueeze(gamma, axes=3) beta = unsqueeze(beta, axes=2) beta = unsqueeze(beta, axes=3) out = out * gamma + beta return out
def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = L.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) == 1: start_pos = L.unsqueeze(start_pos, axes=[-1]) if len(end_pos.shape) == 1: end_pos = L.unsqueeze(end_pos, axes=[-1]) start_loss = L.softmax_with_cross_entropy(start_logit, start_pos) end_loss = L.softmax_with_cross_entropy(end_logits, end_pos) loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2. else: loss = None return loss, start_logit, end_logits
def forward(self, input): x = self.DownBlock(input) print('x: '+str(x.shape)) gap = layers.adaptive_pool2d(x, 1, pool_type='avg') gap_logit = self.gap_fc(layers.reshape(gap, [x.shape[0], -1])) gap_weight = list(self.gap_fc.parameters())[0] gap = x * layers.unsqueeze(layers.unsqueeze(gap_weight, 2), 3) gmp = layers.adaptive_pool2d(x, 1, pool_type='max') gmp_logit = self.gmp_fc(layers.reshape(gmp, [x.shape[0], -1])) gmp_weight = list(self.gmp_fc.parameters())[0] gmp = x * layers.unsqueeze(layers.unsqueeze(gmp_weight, 2), 3) cam_logit = layers.concat([gap_logit, gmp_logit], 1) x = layers.concat([gap, gmp], 1) x = self.relu(self.conv1x1(x)) heatmap = layers.reduce_sum(x, dim=1, keepdim=True) if self.light: x_ = layers.adaptive_pool2d(x, 1, pool_type='avg') x_ = self.FC(layers.reshape(x_, [x_.shape[0], -1])) else: x_ = self.FC(layers.reshape(x, [x.shape[0], -1])) gamma, beta = self.gamma(x_), self.beta(x_) for i in range(self.n_blocks): x = getattr(self, 'UpBlock1_' + str(i+1))(x, gamma, beta) out = self.UpBlock2(x) return out, cam_logit, heatmap
def forward(self, input): x = self.DownBlock(input) # gap = torch.nn.functional.adaptive_avg_pool2d(x, 1) # gap_logit = self.gap_fc(gap.view(x.shape[0], -1)) # gap_weight = list(self.gap_fc.parameters())[0] # gap = x * gap_weight.unsqueeze(2).unsqueeze(3) # adaptive_avg_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='avg') # pool into 1x1 feature map # gap = adaptive_avg_pool2d_1(x) # print('x', x.shape) gap = layers.adaptive_pool2d(x, 1, pool_type='avg') # print('gap', gap.shape) gap_logit = self.gap_fc(layers.reshape(gap, shape=(x.shape[0], -1))) # print('gap_logit', gap_logit.shape) gap_weight = self.gap_fc.parameters()[0] gap_weight = layers.reshape(gap_weight, shape=(1, -1)) # print('gap_weight', gap_weight.shape) gap = x * layers.unsqueeze(layers.unsqueeze(gap_weight, 2), 3) # print('gap', gap.shape) # gmp = torch.nn.functional.adaptive_max_pool2d(x, 1) # gmp_logit = self.gmp_fc(gmp.view(x.shape[0], -1)) # gmp_weight = list(self.gmp_fc.parameters())[0] # gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3) # adaptive_max_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='max') # pool into 1x1 feature map # gmp = adaptive_max_pool2d_1(x) gmp = layers.adaptive_pool2d(x, 1, pool_type='max') gmp_logit = self.gmp_fc(layers.reshape(gmp, shape=(x.shape[0], -1))) gmp_weight = self.gmp_fc.parameters()[0] gmp_weight = layers.reshape(gmp_weight, shape=(1, -1)) gmp = x * layers.unsqueeze(layers.unsqueeze(gmp_weight, 2), 3) # cam_logit = torch.cat([gap_logit, gmp_logit], 1) # x = torch.cat([gap, gmp], 1) # x = self.relu(self.conv1x1(x)) cam_logit = layers.concat([gap_logit, gmp_logit], 1) x = layers.concat([gap, gmp], 1) x = self.relu(self.conv1x1(x)) # heatmap = torch.sum(x, dim=1, keepdim=True) heatmap = layers.reduce_sum(x, dim=1, keep_dim=True) if self.light: # x_ = torch.nn.functional.adaptive_avg_pool2d(x, 1) # x_ = self.FC(x_.view(x_.shape[0], -1)) # adaptive_avg_pool2d_1 = dygraph.Pool2D(pool_size=x.shape[-2:], pool_type='avg') # x_ = adaptive_avg_pool2d_1(x) x_ = layers.adaptive_pool2d(x, 1, pool_type='avg') x_ = self.FC(layers.reshape(x_, shape=(x_.shape[0], -1))) else: # x_ = self.FC(x.view(x.shape[0], -1)) x_ = self.FC(layers.reshape(x, shape=(x.shape[0], -1))) gamma, beta = self.gamma(x_), self.beta(x_) for i in range(self.n_blocks): x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta) out = self.UpBlock2(x) return out, cam_logit, heatmap
def decoder_step(gru_unit, cue_gru_unit, step_in, hidden, input_size, hidden_size, memory, memory_mask, knowledge, mask=None): """ decoder step """ # get attention out # get hidden top layers top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1]) top_hidden = layers.squeeze(top_hidden, axes=[0]) top_hidden = layers.unsqueeze(top_hidden, axes=[1]) weight_memory, attn = dot_attention(top_hidden, memory, memory_mask) step_in = layers.unsqueeze(step_in, axes=[1]) rnn_input_list = [step_in, weight_memory] if weight_memory.shape[0] == -1: knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape) else: knowledge_1 = knowledge cue_input_list = [knowledge_1, weight_memory] output_list = [weight_memory] rnn_input = layers.concat(rnn_input_list, axis=2) rnn_input = layers.squeeze(rnn_input, axes=[1]) rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask) cue_input = layers.concat(cue_input_list, axis=2) cue_input = layers.squeeze(cue_input, axes=[1]) cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask) h_y = layers.tanh( fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1")) h_cue = layers.tanh( fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2")) concate_y_cue = layers.concat([h_y, h_cue], axis=2) k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3')) new_hidden = h_y * k - h_cue * (k - 1.0) new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2]) output_list.append(new_hidden_tmp) real_out = layers.concat(output_list, axis=2) if mask: mask_tmp = layers.unsqueeze(mask, axes=[0]) new_hidden = layers.elementwise_mul((new_hidden - hidden), mask_tmp, axis=0) new_hidden += hidden return real_out, new_hidden
def forward(self, x, y, **kargs): """ Adaptive Normalization forward. Args: x (N x C1 x *): input, y (N x C2): Conditional information. Returns: out (N x c1 x *): output """ residual_dim = len(x.shape) - len(y.shape) if self.projection: if self.separate_projection: gamma = self.fc_gamma(y) beta = self.fc_beta(y) for _ in range(residual_dim): gamma = L.unsqueeze(gamma, -1) beta = L.unsqueeze(beta, -1) else: y = self.fc(x) for _ in range(residual_dim): y = L.unsqueeze(y, -1) gamma, beta = L.split(y, num_or_sections=2, dim=1) else: for _ in range(residual_dim): y = L.unsqueeze(y, -1) gamma, beta = L.split(y, 2, 1) x = self.norm(x) if self.norm is not None else x out = x * (1 + gamma) + beta return out
def get_enc_bias(source_inputs): """ get_enc_bias """ source_inputs = layers.cast(source_inputs, 'float32') emb_sum = layers.reduce_sum(layers.abs(source_inputs), dim=-1) zero = layers.fill_constant([1], 'float32', value=0) bias = layers.cast(layers.equal(emb_sum, zero), 'float32') * -1e9 return layers.unsqueeze(layers.unsqueeze(bias, axes=[1]), axes=[1])
def forward(self, input, gamma, beta): in_mean, in_var = reduce_mean(input, dim=[2, 3], keepdim=True), var(input, axis=[2, 3], keepdim=True) out_in = (input - in_mean) / layers.sqrt(in_var + self.eps) ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3], keepdim=True), var(input, axis=[1, 2, 3], keepdim=True) out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps) out = layers.expand(self.rho, [input.shape[0], -1, -1, -1]) * out_in + (1-layers.expand[input.shape[0], -1, -1, -1]) * out_in out = out * layers.unsqueeze(layers.unsqueeze(gamma, 2), 3) + layers.unsqueeze(layers.unsqueeze(beta, 2), 3) return out
def erniesage_v3_aggregator(gw, feature, hidden_size, act, initializer, learning_rate, name): msg = gw.send(copy_send, nfeat_list=[("h", feature)]) neigh_feature = gw.recv(msg, ernie_recv) neigh_feature = L.cast(L.unsqueeze(neigh_feature, [-1]), "int64") feature = L.unsqueeze(feature, [-1]) cls = L.fill_constant_batch_size_like(feature, [-1, 1, 1], "int64", 1) term_ids = L.concat([cls, feature[:, :-1], neigh_feature], 1) term_ids.stop_gradient = True return term_ids
def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, pos=None, query_pos=None): output = tgt intermediate = [] assert tgt_mask is None, "Not implement compute tgt_mask's attn_mask." if memory_mask is not None: bs, tgt_length = tgt.shape[:2] memory_length = memory.shape[1] attn_mask = L.zeros([bs, tgt_length, memory_length], dtype="float32") memory_mask = L.expand( L.unsqueeze(memory_mask, [1]), (1, tgt_length, 1)) # [bs, tgt_length, memory_length] attn_mask = attn_mask.numpy() memory_mask = memory_mask.numpy() attn_mask[memory_mask] = -1e8 attn_mask = dg.to_variable(attn_mask) attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, self.nhead, 1, 1)) # [bs, nhead, tgt_length, memory_length] memory_mask = attn_mask attention_weight = [] for layer in self.layers: output, self_attn_weights, multihead_attn_weights = layer( output, memory, tgt_mask=tgt_mask, memory_mask=memory_mask, pos=pos, query_pos=query_pos) attention_weight.append( (self_attn_weights, multihead_attn_weights)) if self.return_intermediate: intermediate.append(self.norm(output)) if self.norm is not None: output = self.norm(output) if self.return_intermediate: intermediate.pop() intermediate.append(output) if self.return_intermediate: return L.stack(intermediate), attention_weight return L.unsqueeze(output, [0]), attention_weight
def forward(self, features): src_ids, sent_ids = features dtype = 'float16' if self.hparam['fp16'] else 'float32' zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True bert = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['fp16'] ) cls_feats = bert.get_pooled_output() cls_feats = L.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train" ) logits = L.fc( input=cls_feats, size=self.hparam['num_label'], param_attr=F.ParamAttr( name="cls_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr( name="cls_out_b", initializer=F.initializer.Constant(0.)) ) propeller.summary.histogram('pred', logits) if self.mode is propeller.RunMode.PREDICT: probs = L.softmax(logits) return probs else: return logits
def forward(self, q, k, v, lengths, speaker_embed, start_index, force_monotonic=False, prev_coeffs=None, window=None): # add position encoding as an inductive bias if self.has_bias: # multi-speaker model omega_q = 2 * F.sigmoid( F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1])) omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze( self.k_pos_affine(speaker_embed), axes=[-1])) else: # single-speaker case batch_size = q.shape[0] omega_q = F.ones((batch_size, ), dtype="float32") omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q) k += self.position_encoding_weight * positional_encoding(k, 0, omega_k) q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v) activations = F.matmul(q, k, transpose_y=True) activations /= np.sqrt(self.attention_dim) if self.training: # mask the <pad> parts from the encoder mask = F.sequence_mask(lengths, dtype="float32") attn_bias = F.scale(1. - mask, -1000) activations += F.unsqueeze(attn_bias, [1]) elif force_monotonic: assert window is not None backward_step, forward_step = window T_enc = k.shape[1] batch_size, T_dec, _ = q.shape # actually T_dec = 1 here alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \ if prev_coeffs is None \ else F.argmax(prev_coeffs, axis=-1) backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool") forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool") mask = F.cast(F.logical_xor(backward, forward), "float32") # print("mask's shape:", mask.shape) attn_bias = F.scale(1. - mask, -1000) activations += attn_bias # softmax coefficients = F.softmax(activations, axis=-1) # context vector coefficients = F.dropout(coefficients, 1. - self.keep_prob, dropout_implementation='upscale_in_train') contexts = F.matmul(coefficients, v) # context normalization enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32") contexts *= F.sqrt(enc_lengths) # out affine contexts = self.out_affine(contexts) return contexts, coefficients
def forward(self, input): x = self.DownBlock(input) gap = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg') gap_ = reshape(x=gap, shape=(x.shape[0], -1)) gap_logit = self.gap_fc(gap_) gap_weight = self.gap_fc.parameters()[0] gap_weight = transpose(gap_weight, perm=[1, 0]) gap_weight = unsqueeze(gap_weight, axes=2) gap_weight = unsqueeze(gap_weight, axes=3) gap = x * gap_weight gmp = adaptive_pool2d(x, pool_size=[1, 1], pool_type='max') gmp_ = reshape(x=gmp, shape=(x.shape[0], -1)) gmp_logit = self.gmp_fc(gmp_) gmp_weight = self.gmp_fc.parameters()[0] gmp_weight = transpose(gmp_weight, perm=[1, 0]) gmp_weight = unsqueeze(gmp_weight, axes=2) gmp_weight = unsqueeze(gmp_weight, axes=3) gmp = x * gmp_weight cam_logit = concat(input=[gap_logit, gmp_logit], axis=1) x = concat(input=[gap, gmp], axis=1) x = self.relu(self.conv1x1(x)) heatmap = reduce_sum(x, dim=1, keep_dim=True) if self.light: x_ = adaptive_pool2d(x, pool_size=[1, 1], pool_type='avg') x_ = reshape(x=x_, shape=(x_.shape[0], -1)) x_ = self.FC(x_) else: x_ = reshape(x, shape=(x.shape[0], -1)) x_ = self.FC(x_) gamma, beta = self.gamma(x_), self.beta(x_) for i in range(self.n_blocks): x = getattr(self, 'UpBlock1_' + str(i + 1))(x, gamma, beta) out = self.UpBlock2(x) return out, cam_logit, heatmap
def get_attention_mask(mask, nhead): # mask: [bs, L] -> attn_mask: [bs, nhead, L, L] bs, l = mask.shape row_mask = L.expand(L.unsqueeze(mask, [2]), (1, 1, l)) # [bs, L, L] col_mask = L.expand(L.unsqueeze(mask, [1]), (1, l, 1)) # [bs, L, L] mask = L.logical_or(row_mask, col_mask) attn_mask = L.zeros([bs, l, l], dtype="float32") attn_mask = attn_mask.numpy() mask = mask.numpy() attn_mask[mask] = -1e8 attn_mask = dg.to_variable(attn_mask) attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, nhead, 1, 1)) # [bs, nhead, L1, L2] return attn_mask
def forward(self, input, gamma, beta): rho_ = L.clip(self.rho, min=0, max=1) in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True) in_var = var(input, dim=[2, 3], keepdim=True) out_in = (input - in_mean) / L.sqrt(in_var + self.eps) ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True) ln_var = var(input, dim=[1, 2, 3], keepdim=True) out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps) out = rho_ * out_in + (1 - rho_) * out_ln out = out * L.unsqueeze(gamma, axes=[2, 3]) + L.unsqueeze(beta, axes=[2, 3]) return out
def matrix_nms(bboxes, scores, score_threshold, post_threshold, nms_top_k, keep_top_k, use_gaussian=False, gaussian_sigma=2.): scores = L.transpose(scores, [1, 0]) inds = L.where(scores > score_threshold) if len(inds) == 0: return L.zeros((0, 6), 'float32') - 1.0 cate_scores = L.gather_nd(scores, inds) cate_labels = inds[:, 1] bboxes = L.gather(bboxes, inds[:, 0]) # sort and keep top nms_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if nms_top_k > 0 and len(sort_inds) > nms_top_k: sort_inds = sort_inds[:nms_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) # Matrix NMS kernel = 'gaussian' if use_gaussian else 'linear' cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma) # filter. keep = L.where(cate_scores >= post_threshold) if len(keep) == 0: return L.zeros((0, 6), 'float32') - 1.0 bboxes = L.gather(bboxes, keep) cate_scores = L.gather(cate_scores, keep) cate_labels = L.gather(cate_labels, keep) # sort and keep keep_top_k _, sort_inds = L.argsort(cate_scores, descending=True) if len(sort_inds) > keep_top_k: sort_inds = sort_inds[:keep_top_k] bboxes = L.gather(bboxes, sort_inds) cate_scores = L.gather(cate_scores, sort_inds) cate_labels = L.gather(cate_labels, sort_inds) cate_scores = L.unsqueeze(cate_scores, 1) cate_labels = L.unsqueeze(cate_labels, 1) cate_labels = L.cast(cate_labels, 'float32') pred = L.concat([cate_labels, cate_scores, bboxes], 1) return pred
def _decode(self, x, y, w, h, anchors, stride, scale_x_y, eps, is_gt=False): conv_shape = x.shape # (8, 13, 13, 3) batch_size = conv_shape[0] n_grid = conv_shape[1] anchor_per_scale = conv_shape[3] _x = L.unsqueeze(x, 4) _y = L.unsqueeze(y, 4) conv_raw_dxdy = L.concat([_x, _y], -1) # (8, 13, 13, 3, 2) _w = L.unsqueeze(w, 4) _h = L.unsqueeze(h, 4) conv_raw_dwdh = L.concat([_w, _h], -1) # (8, 13, 13, 3, 2) rows = L.range(0, n_grid, 1, 'float32') cols = L.range(0, n_grid, 1, 'float32') rows = L.expand(L.reshape(rows, (1, -1, 1)), [n_grid, 1, 1]) cols = L.expand(L.reshape(cols, (-1, 1, 1)), [1, n_grid, 1]) offset = L.concat([rows, cols], axis=-1) offset = L.reshape(offset, (1, n_grid, n_grid, 1, 2)) offset = L.expand(offset, [batch_size, 1, 1, anchor_per_scale, 1]) if is_gt: decode_xy = (conv_raw_dxdy + offset) / n_grid else: if (abs(scale_x_y - 1.0) < eps): decode_xy = L.sigmoid(conv_raw_dxdy) decode_xy = (decode_xy + offset) / n_grid else: # Grid Sensitive decode_xy = scale_x_y * L.sigmoid(conv_raw_dxdy) - 0.5 * ( scale_x_y - 1.0) decode_xy = (decode_xy + offset) / n_grid anchor_t = fluid.layers.assign(np.copy(anchors).astype(np.float32)) decode_wh = (L.exp(conv_raw_dwdh) * anchor_t) / (n_grid * stride) decode_xywh = L.concat([decode_xy, decode_wh], axis=-1) if is_gt: decode_xywh.stop_gradient = True return decode_xywh # (8, 13, 13, 3, 4)
def extract_valid_pose_labels(pose_map, pose_type, remove_face_labels, do_remove=True): """ Remove some labels (e.g. face regions) in the pose map if necessary. Args: pose_map (3D, 4D or 5D tensor): input pose map. pose_type (str): 'both' or 'open' remove_face_labels (bool): Whether to remove labels for the face region. do_remove (bool): Do remove face labels. Returns: pose_map (3D, 4D or 5D tensor): Output pose map. """ if pose_map is None: return pose_map if type(pose_map) == list: return [ extract_valid_pose_labels(p, pose_type, remove_face_labels, do_remove) for p in pose_map ] orig_dim = len(pose_map.shape) assert (orig_dim >= 3 and orig_dim <= 5) if orig_dim == 3: pose_map = L.unsqueeze(pose_map, axes=[0, 1]) elif orig_dim == 4: pose_map = L.unsqueeze(pose_map, [0]) if pose_type == 'open': # If input is only openpose, remove densepose part. pose_map = pose_map[:, :, 3:] elif remove_face_labels and do_remove: # Remove face part for densepose input. densepose, openpose = pose_map[:, :, :3], pose_map[:, :, 3:] face_mask = get_face_mask(pose_map[:, :, 2]) face_mask = L.unsqueeze(face_mask, [2]) pose_map = L.concat( [densepose * (1 - face_mask) - face_mask, openpose], axis=2) if orig_dim == 3: pose_map = pose_map[0, 0] elif orig_dim == 4: pose_map = pose_map[0] return pose_map
def test_sequence_unsqueeze(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[8, 2], dtype='float32') out = layers.unsqueeze(input=x, axes=[1]) self.assertIsNotNone(out) print(str(program))
def add_input(self, x, condition=None): """compute the output distribution (represented by its parameters) for a step. It works similarily with the `forward` method but in a `step-in-step-out` fashion. Args: x (Variable): shape(B, T=1), dtype float32, a step of the input waveform. condition (Variable, optional): shape(B, C_cond, T=1), dtype float32, a step of the upsampled condition. Defaults to None. Returns: Variable: shape(B, T=1, C_output), dtype float32, the parameter of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = F.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, T, C), T=1 else: x = F.unsqueeze(x, axes=[-1]) # (B, T, 1), T=1 x = self.embed(x) # (B, T, C) x = F.transpose(x, perm=[0, 2, 1]) # Residual & Skip-conenection & linears z = self.resnet.add_input(x, condition) z = F.transpose(z, [0, 2, 1]) z = F.relu(self.proj2(F.relu(self.proj1(z)))) # (B, T, C) # Output y = self.proj3(z) return y
def forward(self, x, condition=None): """compute the output distribution (represented by its parameters). Args: x (Variable): shape(B, T), dtype float32, the input waveform. condition (Variable, optional): shape(B, C_cond, T), dtype float32, the upsampled condition. Defaults to None. Returns: Variable: shape(B, T, C_output), dtype float32, the parameter of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = F.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, T, C) else: x = F.unsqueeze(x, axes=[-1]) # (B, T, 1) x = self.embed(x) # (B, T, C) x = F.transpose(x, perm=[0, 2, 1]) # (B, C, T) # Residual & Skip-conenection & linears z = self.resnet(x, condition) z = F.transpose(z, [0, 2, 1]) z = F.relu(self.proj2(F.relu(self.proj1(z)))) y = self.proj3(z) return y
def erniesage_v2_aggregator(gw, feature, hidden_size, act, initializer, learning_rate, name): feature = L.unsqueeze(feature, [-1]) msg = gw.send(ernie_send, nfeat_list=[("term_ids", feature)]) neigh_feature = gw.recv( msg, lambda feat: F.layers.sequence_pool(feat, pool_type="sum")) term_ids = feature cls = L.fill_constant_batch_size_like(term_ids, [-1, 1, 1], "int64", 1) term_ids = L.concat([cls, term_ids], 1) term_ids.stop_gradient = True ernie = ErnieModel(term_ids, L.zeros_like(term_ids), config=self.config.ernie_config) self_feature = ernie.get_pooled_output() self_feature = L.fc( self_feature, hidden_size, act=act, param_attr=F.ParamAttr(name=name + "_l", learning_rate=learning_rate), ) neigh_feature = L.fc( neigh_feature, hidden_size, act=act, param_attr=F.ParamAttr(name=name + "_r", learning_rate=learning_rate), ) output = L.concat([self_feature, neigh_feature], axis=1) output = L.l2_normalize(output, axis=1) return output
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size, seq_len]): ground truth label id for each token Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None logits (`Variable` of shape [batch_size, seq_len, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs) hidden = self.dropout(encoded) # maybe not? logits = self.classifier(hidden) if labels is not None: if len(labels.shape) == 2: labels = L.unsqueeze(labels, axes=[-1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) else: loss = None return loss, logits
def forward(self, src, src_length): # encoding encoder_output, encoder_final_state = self.encoder(src, src_length) # decoder initial states decoder_initial_states = [ encoder_final_state, self.decoder.lstm_attention.cell.get_initial_states( batch_ref=encoder_output, shape=[self.hidden_size]) ] # attention mask to avoid paying attention on padddings src_mask = layers.sequence_mask( src_length, maxlen=layers.shape(src)[1], dtype=encoder_output.dtype) encoder_padding_mask = (src_mask - 1.0) * 1e9 encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1]) # Tile the batch dimension with beam_size encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, self.beam_size) encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, self.beam_size) # dynamic decoding with beam search rs, _ = self.beam_search_decoder( inits=decoder_initial_states, encoder_output=encoder_output, encoder_padding_mask=encoder_padding_mask) return rs