def _stacked_dynamic_rnn(self, x, h, seq_length): """stacked mutil_layer dynamic_rnn""" pre_layer = x h_n = () c_n = () output = 0 for i in range(self.num_layers): if self.has_bias: w_ih, w_hh, b_ih, b_hh = self.w_ih_list[i], self.w_hh_list[ i], self.b_ih_list[i], self.b_hh_list[i] else: w_ih, w_hh = self.w_ih_list[i], self.w_hh_list[i] b_ih, b_hh = None, None if self.is_lstm: h_i = (h[0][i], h[1][i]) else: h_i = h[i] output, h_t = self.rnn(pre_layer, h_i, seq_length, w_ih, w_hh, b_ih, b_hh) pre_layer = self.dropout_op(output) if ( self.dropout != 0 and i < self.num_layers - 1) else output if self.is_lstm: h_n += (h_t[0], ) c_n += (h_t[1], ) else: h_n += (h_t, ) if self.is_lstm: h_n = P.Concat(0)(h_n) c_n = P.Concat(0)(c_n) h_n = h_n.view(h[0].shape) c_n = c_n.view(h[1].shape) return output, (h_n.view(h[0].shape), c_n.view(h[1].shape)) h_n = P.Concat(0)(h_n) return output, h_n.view(h.shape)
def _stacked_bi_dynamic_rnn(self, x, h, seq_length, weights): """stacked bidirectional dynamic_rnn""" pre_layer = x h_n = () c_n = () output = 0 for i in range(self.num_layers): offset = i * 2 if self.has_bias: w_f_ih, w_f_hh, b_f_ih, b_f_hh = weights[offset] w_b_ih, w_b_hh, b_b_ih, b_b_hh = weights[offset + 1] else: w_f_ih, w_f_hh = weights[offset] w_b_ih, w_b_hh = weights[offset + 1] b_f_ih, b_f_hh, b_b_ih, b_b_hh = None, None, None, None if self.is_lstm: h_f_i = (h[0][offset], h[1][offset]) h_b_i = (h[0][offset + 1], h[1][offset + 1]) else: h_f_i = h[offset] h_b_i = h[offset + 1] if seq_length is None: x_b = P.ReverseV2([0])(pre_layer) else: x_b = P.ReverseSequence(0, 1)(pre_layer, seq_length) output_f, h_t_f = self.rnn(pre_layer, h_f_i, seq_length, w_f_ih, w_f_hh, b_f_ih, b_f_hh) output_b, h_t_b = self.rnn(x_b, h_b_i, seq_length, w_b_ih, w_b_hh, b_b_ih, b_b_hh) if seq_length is None: output_b = P.ReverseV2([0])(output_b) else: output_b = P.ReverseSequence(0, 1)(output_b, seq_length) output = P.Concat(2)((output_f, output_b)) pre_layer = self.dropout_op(output) if ( self.dropout != 0 and i < self.num_layers - 1) else output if self.is_lstm: h_n += ( h_t_f[0], h_t_b[0], ) c_n += ( h_t_f[1], h_t_b[1], ) else: h_n += ( h_t_f, h_t_b, ) if self.is_lstm: h_n = P.Concat(0)(h_n) c_n = P.Concat(0)(c_n) h_n = h_n.view(h[0].shape) c_n = c_n.view(h[1].shape) return output, (h_n.view(h[0].shape), c_n.view(h[1].shape)) else: h_n = P.Concat(0)(h_n) return output, h_n.view(h.shape) return x, h
def __init__(self, begin, stride): super(GetOffsetPosition, self).__init__() self.begin = begin self.stride = stride self.meshgrid = ops.Meshgrid() self.shape = ops.Shape() self.reshape = ops.Reshape() self.cat_a0 = ops.Concat(axis=0) self.cat_a1 = ops.Concat(axis=1) self.tile = ops.Tile() self.dtype = ops.DType() self.range = nn.Range(-self.begin, self.begin + 1) self.cast = ops.Cast()
def construct(self, x, seq_lengths): """Defines the ReverseSequence operator computation performed.""" batch_size = x.shape[self.batch_dim] max_seq_len = x.shape[self.seq_dim] seq_lens_type = seq_lengths.dtype back = ops.Sub()(seq_lengths, ops.OnesLike()(seq_lengths)) batch_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 0) forward_idx = self.make_shape((batch_size, max_seq_len), seq_lens_type, 1) back = back.view(-1, 1) reverse_idx = ops.Sub()(back, forward_idx) condition = ops.Less()(reverse_idx, ops.ZerosLike()(reverse_idx)) reverse_idx = ops.Select()(condition, forward_idx, reverse_idx) reverse_idx = ops.ExpandDims()(reverse_idx, 2) batch_idx = ops.ExpandDims()(batch_idx, 2) if self.batch_dim > self.seq_dim: batch_idx = ops.Transpose()(batch_idx, (1, 0, 2)) reverse_idx = ops.Transpose()(reverse_idx, (1, 0, 2)) x = ops.Transpose()(x, (1, 0, 2)) start_indices = ops.Concat(2)((batch_idx, reverse_idx)) output = ops.GatherNd()(x, start_indices) return output
def __init__(self, ks): super(RegenerateFeatureMap, self).__init__() self.ks = ks self.shape = ops.Shape() self.reshape = ops.Reshape() self.split = ops.Split(axis=-1, output_num=ks) self.concat = ops.Concat(axis=2)
def __init__(self, cin, cout, up_f=2, enable_dcn=False): super(IDAUp, self).__init__() self.enable_dcn = enable_dcn if enable_dcn: self.proj = DeformConv(cin, cout) self.node = DeformConv(cout, cout) else: self.proj = nn.Conv2dBnAct(cin, cout, kernel_size=1, stride=1, pad_mode='same', has_bias=False, has_bn=True, momentum=BN_MOMENTUM, activation='relu', after_fake=False) self.node = nn.Conv2dBnAct(2 * cout, cout, kernel_size=3, stride=1, pad_mode='same', has_bias=False, has_bn=True, momentum=BN_MOMENTUM, activation='relu', after_fake=False) self.up = nn.Conv2dTranspose(cout, cout, up_f * 2, stride=up_f, pad_mode='pad', padding=up_f // 2) self.concat = ops.Concat(axis=1)
def compute_log_likelihood(self, x): """ Return log-likelihood of the model for each example. WARNING: This is really a joint distribution only if the DAG constraint on the mask is satisfied. otherwise the joint does not integrate to one. Parameters ---------- x: mindspore.Tensor (batch_size, input_dim) Returns ------- (batch_size, input_dim) log-likelihoods """ weights, biases, extra_params = self.get_parameters(mode="wbx") density_params = self.forward_given_params(x, weights, biases) log_probs = [] for i in range(self.input_dim): x_d = x[:, i] if len(extra_params) != 0: lp = self.get_distribution( x_d, density_params[i].view(density_params[i].shape[0]), extra_params[i]) else: density_param = ops.Unstack(axis=1)(density_params[i]) lp = self.get_distribution(x_d, density_param[0], density_param[1]) log_probs.append(ops.expand_dims(lp, 1)) return ops.Concat(axis=1)(log_probs)
def __init__(self): super(BasicCell, self).__init__() self.conv3x3_1 = _conv3x3(128, 128) self.bn3x3_1 = _bn(128) self.conv3x3_2 = _conv3x3(128, 128) self.bn3x3_2 = _bn(128) self.conv3x3_3 = _conv3x3(128, 128) self.bn3x3_3 = _bn(128) self.mp = nn.MaxPool2d(kernel_size=3, stride=1, pad_mode="same") self.proj1 = _conv1x1(128, 64) self.bn1 = _bn(64) self.proj2 = _conv1x1(128, 64) self.bn2 = _bn(64) self.proj3 = _conv1x1(128, 64) self.bn3 = _bn(64) self.proj4 = _conv1x1(128, 64) self.bn4 = _bn(64) self.proj5 = _conv1x1(128, 64) self.bn5 = _bn(64) self.proj6 = _conv1x1(128, 64) self.bn6 = _bn(64) self.relu = P.ReLU() self.concat = ops.Concat(axis=1)
def __init__(self, net_config): super(CenterNetMultiPoseLossCell, self).__init__() self.network = GatherMultiPoseFeatureCell(net_config) self.reduce_sum = ops.ReduceSum() self.crit = FocalLoss() self.crit_hm_hp = nn.MSELoss() if net_config.mse_loss else self.crit self.crit_kp = RegWeightedL1Loss( ) if not net_config.dense_hp else nn.L1Loss(reduction='sum') self.crit_reg = RegLoss(net_config.reg_loss) self.hm_weight = net_config.hm_weight self.hm_hp_weight = net_config.hm_hp_weight self.hp_weight = net_config.hp_weight self.wh_weight = net_config.wh_weight self.off_weight = net_config.off_weight self.hm_hp = net_config.hm_hp self.dense_hp = net_config.dense_hp self.reg_offset = net_config.reg_offset self.reg_hp_offset = net_config.reg_hp_offset self.hm_hp_ind = 3 if self.hm_hp else 2 self.reg_ind = self.hm_hp_ind + 1 if self.reg_offset else self.hm_hp_ind self.reg_hp_ind = self.reg_ind + 1 if self.reg_hp_offset else self.reg_ind # just used for check self.print = ops.Print() self.concat = ops.Concat(axis=1) self.reshape = ops.Reshape()
def __init__(self, padding: Union[int, Tuple[int, int]]): super().__init__() if isinstance(padding, int): self.padding = (padding, padding) else: self.padding = padding self.concat = ops.Concat(-1) self.tile = ops.Tile()
def __init__(self): super(log_softmax, self).__init__() self.maxi = P.ReduceMax() self.log = P.Log() self.sums = P.ReduceSum() self.exp = P.Exp() self.axis = -1 self.concat = P.Concat(-1) self.expanddims = P.ExpandDims()
def __init__(self, padding: Union[int, Tuple[int, int]], value): super().__init__() if isinstance(padding, int): self.padding = (padding, padding) else: self.padding = padding self.value = value self.concat = ops.Concat(-1) self.fill = ops.Fill()
def __init__(self): super(FlipLROff, self).__init__() self.gather_flip_feat = GatherFlipFeature() self.flip_index = Tensor( np.array( [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15], np.int32)) self.half = ops.Split(axis=0, output_num=2) self.split = ops.Split(axis=1, output_num=2) self.flip = ops.ReverseV2(axis=[3]) self.concat = ops.Concat(axis=1)
def __init__(self, mixture_size: int, do_layer_norm: bool = False) -> None: super(Scalar_mix, self).__init__() self.mixture_size = mixture_size self.do_layer_norm = do_layer_norm self.scalar_parameters = ParameterTuple([Parameter(Tensor(np.array([0.0]), mindspore.float32)) \ for _ in range(mixture_size)]) self.gamma = Parameter(Tensor(np.array([0.0]), mindspore.float32)) self.sum = P.ReduceSum() self.sqrt = P.Sqrt() self.cat = P.Concat() self.unsqueeze = P.ExpandDims(0)
def __init__(self): super(GetSurroundFeature, self).__init__() self.shape = ops.Shape() self.concat = ops.Concat(axis=1) self.reshape = ops.Reshape() self.half = ops.Split(axis=-1, output_num=2) self.tile = ops.Tile() self.gather_nd = ops.GatherNd() self.transpose = ops.Transpose() self.perm_list = (0, 2, 3, 1) self.order_list = (0, 3, 1, 2) self.expand_dims = ops.ExpandDims()
def __init__(self, enable_cpu_gather=True): super(GatherFeature, self).__init__() self.tile = ops.Tile() self.shape = ops.Shape() self.concat = ops.Concat(axis=1) self.reshape = ops.Reshape() self.enable_cpu_gather = enable_cpu_gather if self.enable_cpu_gather: self.gather_nd = ops.GatherD() self.expand_dims = ops.ExpandDims() else: self.gather_nd = ops.GatherND()
def __init__(self, net_config, K=100, enable_nms_fp16=True): super(MultiPoseDecode, self).__init__() self.K = K self.nms = NMS(enable_nms_fp16=enable_nms_fp16) self.shape = ops.Shape() self.gather_topk = GatherTopK() self.gather_topk_channel = GatherTopKChannel() self.gather_by_ind = GatherFeatureByInd() self.half = ops.Split(axis=-1, output_num=2) self.half_first = ops.Split(axis=0, output_num=2) self.split = ops.Split(axis=-1, output_num=4) self.flip_lr = FlipLR() self.flip_lr_off = FlipLROff() self.flip_tensor = FlipTensor() self.concat = ops.Concat(axis=1) self.concat_a2 = ops.Concat(axis=2) self.concat_a3 = ops.Concat(axis=3) self.trans_gather_feature = TransposeGatherFeature() self.expand_dims = ops.ExpandDims() self.reshape = ops.Reshape() self.add = ops.TensorAdd() self.dtype = ops.DType() self.cast = ops.Cast() self.thresh = 0.1 self.transpose = ops.Transpose() self.perm_list = (0, 2, 1, 3) self.tile = ops.Tile() self.greater = ops.Greater() self.square = ops.Square() self.sqrt = ops.Sqrt() self.reduce_sum = ops.ReduceSum() self.min = ops.ArgMinWithValue(axis=3) self.max = ops.Maximum() self.hm_hp = net_config.hm_hp self.dense_hp = net_config.dense_hp self.reg_offset = net_config.reg_offset self.reg_hp_offset = net_config.reg_hp_offset self.hm_hp_ind = 3 if self.hm_hp else 2 self.reg_ind = self.hm_hp_ind + 1 if self.reg_offset else self.hm_hp_ind self.reg_hp_ind = self.reg_ind + 1 if self.reg_hp_offset else self.reg_ind
def __init__(self, in_channels, out_channels, kernel_size, residual): super(Root, self).__init__() self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=1, has_bias=False, pad_mode='pad', padding=(kernel_size - 1) // 2) self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM) self.relu = ops.ReLU() self.residual = residual self.cat = ops.Concat(axis=1)
def __init__(self, net_config, K=100, enable_nms_fp16=True): super(DetectionDecode, self).__init__() self.K = K self.nms = NMS(enable_nms_fp16=enable_nms_fp16) self.shape = ops.Shape() self.gather_topk = GatherTopK() self.half = ops.Split(axis=-1, output_num=2) self.add = ops.TensorAdd() self.concat_a2 = ops.Concat(axis=2) self.trans_gather_feature = TransposeGatherFeature() self.expand_dims = ops.ExpandDims() self.reshape = ops.Reshape() self.reg_offset = net_config.reg_offset self.Sigmoid = nn.Sigmoid()
def construct(self, x1, x2=None, adj=None, modal=1, cpa=False): # domain specific block if modal == 0: x1 = self.visible_module(x1) x2 = self.thermal_module(x2) cat_op = P.Concat() x = cat_op((x1, x2)) elif modal == 1: x = self.visible_module(x1) elif modal == 2: x = self.thermal_module(x2) # shared four blocks # print("x.shape is ", x.shape) x = self.base_resnet(x) # print("x.shape is ", x.shape) x_pool = self.avgpool(x, (2, 3)) # print("x_pool.shape is ", x_pool.shape) x_pool = x_pool.view(x_pool.shape[0], x_pool.shape[1]) # print("After Reshape:", x_pool.shape) # print("x_pool is :", x_pool) # feat = self.bottleneck(x_pool) # do not support cpu feat = x_pool if self.part > 0: # intra_modality weighted part attention feat_att = self.wpa(x, feat, 1) # why t==1? if self.training: # cross-modality graph attention # TODO: Add cross-modality graph attention mindspore version # return x_pool, self.classifier(feat), self.classifier(feat_att) out = self.classifier(feat) # print("resnet classification output is", out) if self.part > 0: out_att = self.classifier(feat_att) # print("IWPA classification output is", out_att) if self.part > 0: return feat, feat_att, out, out_att else: return feat, feat, out, out # just for debug else: if self.part > 0: return self.l2norm(feat), self.l2norm(feat_att) else: return self.l2norm(feat), self.l2norm(feat) # just for debug
def _stacked_bi_dynamic_rnn(self, x, xr, h, seq_length): """stacked bidirectional dynamic_rnn""" input_forward = x input_backward = xr h_n = () c_n = () outputs = () for i, (forward_cell, backward_cell) in enumerate( zip(self.forward_layers, self.backward_layers)): offset = i * 2 h_f_i = (h[0][offset], h[1][offset]) h_b_i = (h[0][offset + 1], h[1][offset + 1]) output_f, h_t_f = forward_cell(input_forward, h_f_i, seq_length) output_b, h_t_b = backward_cell(input_backward, h_b_i, seq_length) if seq_length is None: output_b = P.ReverseV2([0])(output_b) else: output_b = P.ReverseSequence(0, 1)(output_b, seq_length) output = P.Concat(2)((output_f, output_b)) outputs += (output, ) input_forward = output_f input_backward = output_b h_t = P.Concat(1)((h_t_f[0], h_t_b[0])) c_t = P.Concat(1)((h_t_f[1], h_t_b[1])) h_n += (h_t, ) c_n += (c_t, ) h_n = P.Stack(0)(h_n) c_n = P.Stack(0)(c_n) outputs = P.Stack(0)(outputs) outputs = self.dropout(outputs) return outputs, (h_n, c_n)
def __init__(self, filters, n_filters, max_chars_per_token, char_embed_dim, n_chars, n_highway, output_dim, activation): super().__init__() self.max_chars_per_token = max_chars_per_token # activation for convolutions if activation == 'tanh': self._activation = nn.Tanh() elif activation == 'relu': self._activation = nn.ReLU() else: raise ValueError("Unknown activation") # init char_embedding self.char_embedding = Embedding(n_chars + 1, char_embed_dim, embedding_table=Uniform(1.0), padding_idx=0) # run convolutions convolutions = [] for (width, num) in filters: if activation == 'tanh': cnn_weight_init = Normal(np.sqrt(1.0 / width * char_embed_dim)) elif activation == 'relu': cnn_weight_init = Uniform(0.05) conv = nn.Conv1d(in_channels=char_embed_dim, out_channels=num, kernel_size=width, has_bias=True, weight_init=cnn_weight_init, pad_mode='valid') convolutions.append(conv) self._convolutions = nn.CellList(convolutions) # highway layers self._highways = HighWay(n_filters, n_highway, 'relu') # projection layer self._projection = nn.Dense(n_filters, output_dim, has_bias=True, weight_init=Normal(np.sqrt(1.0 / n_filters))) # array operations self.transpose = P.Transpose() self.concat = P.Concat(-1) self.max = P.ReduceMax()
def __init__(self, batch_size, temperature=1, world_size=1): super(NT_Xent_Loss, self).__init__() # Parameters. self.LARGE_NUM = 1e9 self.batch_size = batch_size self.temperature = temperature self.world_size = world_size self.N = 2 * self.batch_size * self.world_size # Tail_Loss. self.criterion = CrossEntropyLoss(reduction="mean") self.norm = P.L2Normalize(axis=1) self.one_hot = P.OneHot() self.range = nn.Range(0, self.batch_size) self.one = Tensor(1.0, mstype.float32) self.zero = Tensor(0.0, mstype.float32) self.transpose = P.Transpose() self.matmul = nn.MatMul() # Operations. self.ones = P.Ones() self.zeros = P.Zeros() self.cat1 = P.Concat(axis=1)
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional, num_classes, weight, batch_size): super(SentimentNet, self).__init__() # Map words to vectors self.embedding = nn.Embedding(vocab_size, embed_size, embedding_table=weight) self.embedding.embedding_table.requires_grad = False self.trans = ops.Transpose() self.perm = (1, 0, 2) if context.get_context("device_target") in STACK_LSTM_DEVICE: # stack lstm by user self.encoder = StackLSTM(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional, dropout=0.0) self.h, self.c = stack_lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) else: # standard lstm self.encoder = nn.LSTM(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional, dropout=0.0) self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) self.concat = ops.Concat(1) if bidirectional: self.decoder = nn.Dense(num_hiddens * 4, num_classes) else: self.decoder = nn.Dense(num_hiddens * 2, num_classes)
def construct(self, img1, img2, label1, label2, modal=0, cpa=False): feat, feat_att, out, out_att = self._backbone(img1, x2=img2, modal=modal, cpa=False) op1 = P.Concat() label = op1((label1, label2)) op2 = P.Cast() label_ = op2(label, ms.int32) loss_id = self._ce_loss(out, label_) # loss_id_att = self._ce_loss(out_att, label_) sum = P.ReduceSum() loss_id = sum(loss_id) / label_.shape[0] # print("loss id is", loss_id) loss_tri = self._tri_loss(feat, label) # loss_tri_att = self._tri_loss(feat_att, label) # print("triplet id is", loss_tri) return loss_id + loss_tri
def average_gradients(tower_grads): average_grads = [] for grad_and_vars in zip(*tower_grads): g0, v0 = grad_and_vars[0] if g0 is None: average_grads.append((g0, v0)) continue # the gradient is type IndexedSlices # to do # a normal tensor can just do a simple average grads = [] for g, v in grad_and_vars: expand_g = P.ExpandDims()(g, 0) grads.append(expand_g) # Average over the 'tower' dimension grad = P.Concat(0)(grads) grad = P.ReduceMean(grad, 0) v = grad_and_vars[0][1] grad_and_vars = (grad, v) average_grads.append(grad_and_vars) assert len(average_grads) == len(list(zip(*tower_grads))) return average_grads
def construct(self, inputs, targets): """ Args: - inputs: feature matrix with shape (batch_size, feat_dim) - targets: ground truth labels with shape (num_classes) """ n = inputs.shape[0] # Compute pairwise distance, replace by the official when merged pow = P.Pow() sum = P.ReduceSum(keep_dims=True) expand = P.BroadcastTo((n, n)) transpose = P.Transpose() mul = P.Mul() add = P.Add() sqrt = P.Sqrt() equal = P.Equal() cat = P.Concat() ones_like = P.OnesLike() dist = pow(inputs, 2) dist = sum(dist, axis=1) dist = expand(dist) dist = dist + transpose(dist, (1, 0)) temp1 = P.matmul(inputs, transpose(inputs, (1, 0))) temp1 = mul(-2, temp1) dist = add(dist, temp1) dist = P.composite.clip_by_value( dist, clip_value_min=1e-12, clip_value_max=100000000 ) # for numerical stability, clip_value_max=? why must set? dist = sqrt(dist) # For each anchor, find the hardest positive and negative targets = expand(targets) mask = equal(targets, transpose(targets, (1, 0))) dist_ap = [] dist_an = [] # only for debugging ##################### # print("dist is") # print(dist.shape) # print(dist) # print("mask is") # print(mask.shape) # print(mask) # print(mask[0]) ##################### for i in range(n): minval = -1.0 maxval = -1.0 for j in range(n): if mask[i][j] and dist[i][j] > maxval: maxval = dist[i][j] if not mask[i][j] and (dist[i][j] < minval or minval == -1): minval = dist[i][j] if (not isinstance(minval, Tensor) or not isinstance(maxval, Tensor) or minval == -1.0 or maxval == -1.0): if self.error_msg is not None: print("Error Msg", file=self.error_msg) print("mask {} is".format(i), file=self.error_msg) print(mask[i], file=self.error_msg) print("dist is:", file=self.error_msg) print(dist[i], file=self.error_msg) print(maxval, file=self.error_msg) print(minval, file=self.error_msg) print(type(maxval), file=self.error_msg) print(type(minval), file=self.error_msg) self.error_msg.flush() # assert minval != -1.0 and isinstance(minval, Tensor) # assert maxval != -1.0 and isinstance(maxval, Tensor) dist_ap.append(maxval.asnumpy()) dist_an.append(minval.asnumpy()) dist_ap = Tensor(dist_ap, ms.float32) dist_an = Tensor(dist_an, ms.float32) # only for debugging ##################### # print(dist_ap) # print(dist_ap.shape) # print(dist_an) ##################### # Compute ranking hinge loss y = ones_like(dist_an) loss = self.ranking_loss(dist_an, dist_ap, y) # # compute accuracy # correct = torch.ge(dist_an, dist_ap).sum().item() return loss # class GradOriTripletLoss(nn.Cell) # def __init__(self, net): # super(GradOriTripletLoss, self).__init__() # self.net = net # self.grad_op = P.GradOperation(get_all=True) # # def construct(self, inputs, targets): # gradient_function = self.grad_op(self.net) # return gradient_function(inputs, targets)
def construct(self, y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, step): if not self.training and step == 0: h_decoder, c_decoder = s_t_1 h_decoder = h_decoder.view(-1, self.hidden_dim) c_decoder = c_decoder.view(-1, self.hidden_dim) s_t_hat = P.Concat(1)( (h_decoder, c_decoder)) # (B, 2 * hidden_dim) c_t, _, coverage_next = self.attention_network( s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage) coverage = coverage_next y_t_1_embed = self.embedding(y_t_1) x = self.x_content(P.Concat(1)((c_t_1, y_t_1_embed))) lstm_out, s_t = self.lstm(P.ExpandDims()(x, 1), s_t_1) h_decoder, c_decoder = s_t h_decoder = h_decoder.view(-1, self.hidden_dim) c_decoder = c_decoder.view(-1, self.hidden_dim) s_t_hat = P.Concat(1)((h_decoder, c_decoder)) c_t, attn_dist, coverage_next = self.attention_network( s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage) if self.training or step > 0: coverage = coverage_next p_gen = None if self.pointer_gen: p_gen_input = P.Concat(1)( (c_t, s_t_hat, x)) # (B, 2 * 2 * hidden_dim + embed_dim) p_gen = self.p_gen_linear(p_gen_input) p_gen = P.Sigmoid()(p_gen) output = P.Concat(1)( (lstm_out.view(-1, self.hidden_dim), c_t)) # (B, hidden_dim * 3) output = self.out1(output) # (B, hidden_dim) output = self.out2(output) # (B, vocab_size) vocab_dist = P.SoftMax(1)(output) if self.pointer_gen: vocab_dist_ = p_gen * vocab_dist attn_dist_ = (1 - p_gen) * attn_dist if extra_zeros is not None: vocab_dist_ = P.Concat(1)((vocab_dist_, extra_zeros)) # like pytorch scatter_add batch_size, attn_len = enc_batch_extend_vocab.shape batch_num = range_tensor(0, batch_size) batch_num = P.ExpandDims()(batch_num, 1) batch_num = P.Tile()(batch_num, (1, attn_len)) indices = P.Pack(2)((batch_num, enc_batch_extend_vocab)) shape = (batch_size, vocab_dist_.shape[1]) attn_dist_ = P.ScatterNd()(indices, attn_dist_, shape) final_dist = vocab_dist_ + attn_dist_ else: final_dist = vocab_dist return final_dist, s_t, c_t, attn_dist, p_gen, coverage
def __init__(self, outer_nc, inner_nc, in_planes=None, dropout=False, submodule=None, outermost=False, innermost=False, alpha=0.2, norm_mode='batch'): super(UnetSkipConnectionBlock, self).__init__() downnorm = nn.BatchNorm2d(inner_nc) upnorm = nn.BatchNorm2d(outer_nc) use_bias = False if norm_mode == 'instance': downnorm = nn.BatchNorm2d(inner_nc, affine=False) upnorm = nn.BatchNorm2d(outer_nc, affine=False) use_bias = True if in_planes is None: in_planes = outer_nc downconv = nn.Conv2d(in_planes, inner_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') downrelu = nn.LeakyReLU(alpha) uprelu = nn.ReLU() if outermost: upconv = nn.Conv2dTranspose(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, pad_mode='pad') down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: upconv = nn.Conv2dTranspose(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: upconv = nn.Conv2dTranspose(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] model = down + [submodule] + up if dropout: model.append(nn.Dropout(0.5)) self.model = nn.SequentialCell(model) self.skip_connections = not outermost self.concat = ops.Concat(axis=1)