示例#1
0
文件: latwalk.py 项目: chr5tphr/ecGAN
def render(gfunc, stepsize=0.1, momentum=0.9, maxstep=24000):
    K = 10
    num = 30
    bbox = config.data.bbox
    cond = nd.one_hot(nd.repeat(nd.arange(K, ctx=ctx), (num-1)//K+1)[:num], K).reshape((num, K, 1, 1))
    anoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx)
    bnoi = nd.random.normal(shape=(num,100,1,1), ctx=ctx)
    slast = 0.
    for step in range(maxstep):
        snoi = anoi - bnoi

        sdist = snoi.norm(axis=1,keepdims=True)
        if sdist.min().asscalar() < .5:
            anoi = nd.random.normal(shape=(30,100,1,1), ctx=ctx)
        snoi /= sdist
        slast = stepsize*snoi + momentum*slast
        bnoi += slast

        gen = gfunc(noise=bnoi, cond=cond)
        indat = ((gen - bbox[0]) * 255/(bbox[1]-bbox[0])).asnumpy().clip(0, 255).astype(np.uint8)
        indat = align_images(indat, 5, 6, 32, 32, 3)
        yield indat
示例#2
0
        numerator += nd.sum(predictions == label)
        # Total number of checks
        denominator += data.shape[0]
    # Returning the accuracy of the net, or the probability of getting label right using our net.
    return (numerator / denominator).asscalar()


epochs = 10
learning_rate = .001

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        label_one_hot = nd.one_hot(label, 10)
        with autograd.record():
            output = net(data)
            loss = cross_entropy(output, label_one_hot)
        loss.backward()
        SGD(params, learning_rate)
        cumulative_loss += nd.sum(loss).asscalar()

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss / num_examples, train_accuracy, test_accuracy))


# The predictor. Returns prediction when we use our net.
def model_predict(net, data):
示例#3
0
文件: Lyric.py 项目: JasonPin/Lyric
def to_onehot(X, size):
    return [nd.one_hot(x, size) for x in X.T]
 def forward(self,inputs, state):
     X = nd.one_hot(inputs.T, self.vocab_size)
     Y, state = self.rnn(X, state)
     out 
示例#5
0
# Initialize weights and biases for each class
W = nd.random_normal(shape=(d_inputs, k_outputs), ctx=cntx)
W0 = nd.random_normal(shape=k_outputs, ctx=cntx)
prams = [W, W0]

# Track the gradients of the parameters
for parameter in prams:
    parameter.attach_grad()

# Execute training loop using SGD
for E in range(epochs):
    total_loss = 0
    for i, (xtrain, ytrain) in enumerate(train_data):
        xtrain = xtrain.as_in_context(cntx).reshape((-1, 784))
        ytrain = ytrain.as_in_context(cntx)
        ylabel_flag = nd.one_hot(ytrain, 5)
        with autograd.record():
            y_out = aux.nnet(xtrain, W, W0)
            loss = aux.cross_ent(y_out, ylabel_flag)
        loss.backward()
        prams = aux.SGD(prams, learn_rate)
        total_loss += nd.sum(loss).asscalar()

    # Evaluate model on training data
    train_accuracy = aux.compute_accuracy(train_data, aux.nnet, prams, cntx)

    # Evaluate model on testing data
    test_accuracy = aux.compute_accuracy(test_data, aux.nnet, prams, cntx)
    print("Epoch %s. Loss: %s, Train Accuracy: %s, Test Accuracy: %s" %
          (E, total_loss / m_cases, train_accuracy, test_accuracy))
示例#6
0
    random.shuffle(example_indices)  #每个输入是时间步长度的歌词段,是将前后有序的所有歌词分段,赋予序号后,随机打乱顺序

    def _data(pos):
        return corpus_indices[pos:pos + num_steps]

    for i in range(epoch_size):
        i = i * batch_size
        batch_indices = example_indices[i:i + batch_size]  #每批所含歌词段序号,是随机乱序
        X = nd.array([_data(j * num_steps) for j in batch_indices],
                     ctx=ctx)  #采每个歌词段的歌词字典索引,用于转为one-hot向量
        Y = nd.array([_data(j * num_steps + 1) for j in batch_indices],
                     ctx=ctx)  #对应歌词下一个字序列
        yield X, Y


'''
nd.one_hot(nd.array([0, 2]), vocab_size)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]]
<NDArray 2x1027 @cpu(0)>
'''


def to_onehot(X, size):
    """Represent inputs with one-hot encoding."""
    return [nd.one_hot(x, size) for x in X.T]


'''
裁剪梯度
示例#7
0
 def forward(self,inputs, state):
     X = nd.one_hot(inputs.T, self.vocab_size)
     Y, state = self.rnn(X, state)
     # 先变成(num_steps*batch_size, num_hiddens),之后output是num_steps*batch_size
     output = self.dense(Y.reshape((-1, Y.shape[-1])))
     return output, state
示例#8
0
文件: base.py 项目: chr5tphr/ecGAN
 def forward_single_out(self, data, cond=None, logged=False):
     out = (self.forward_logged if logged else self)(data)
     if cond is None:
         cond = nd.argmax(out, axis=1)
     cond = nd.one_hot(cond, out.shape[1])
     return cond * out
示例#9
0
    def forward(self, cls_targets, ctr_targets, box_targets, mask_targets,
                matches, cls_preds, ctr_preds, box_preds, mask_preds,
                maskcoe_preds):
        """Compute loss in entire batch across devices."""
        scale = 4
        # require results across different devices at this time
        cls_targets, ctr_targets, box_targets, mask_targets, matches, cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds = \
            [_as_list(x) for x in (cls_targets, ctr_targets, box_targets, mask_targets, matches,
                                   cls_preds, ctr_preds, box_preds, mask_preds, maskcoe_preds)]
        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        ctr_losses = []
        box_losses = []
        mask_losses = []
        sum_losses = []
        for clst, ctrt, boxt, maskt, matche, clsp, ctrp, boxp, maskp, maskcoep in zip(
                *[
                    cls_targets, ctr_targets, box_targets, mask_targets,
                    matches, cls_preds, ctr_preds, box_preds, mask_preds,
                    maskcoe_preds
                ]):

            pos_gt_mask = clst > 0
            # cls loss
            if not self._from_logits:
                clsp = nd.sigmoid(clsp)
            one_hot = nd.one_hot(clst, self._num_class)
            one_hot = nd.slice_axis(one_hot, begin=1, end=None, axis=-1)
            pt = nd.where(one_hot, clsp, 1 - clsp)
            t = nd.ones_like(one_hot)
            alpha = nd.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
            cls_loss = -alpha * (
                (1 - pt)**self._gamma) * nd.log(nd.minimum(pt + self._eps, 1))
            cls_loss = nd.sum(cls_loss) / nd.maximum(nd.sum(pos_gt_mask), 1)
            cls_losses.append(cls_loss)

            # ctr loss
            ctrp = nd.squeeze(ctrp, axis=-1)
            pos_pred_mask = ctrp >= 0
            ctr_loss = (ctrp * pos_pred_mask - ctrp * ctrt +
                        nd.log(1 + nd.exp(-nd.abs(ctrp)))) * pos_gt_mask
            ctr_loss = nd.sum(ctr_loss) / nd.maximum(nd.sum(pos_gt_mask), 1)
            ctr_losses.append(ctr_loss)

            # box loss // iou loss
            px1, py1, px2, py2 = nd.split(boxp,
                                          num_outputs=4,
                                          axis=-1,
                                          squeeze_axis=True)
            gx1, gy1, gx2, gy2 = nd.split(boxt,
                                          num_outputs=4,
                                          axis=-1,
                                          squeeze_axis=True)
            apd = nd.abs(px2 - px1 + 1) * nd.abs(py2 - py1 + 1)
            agt = nd.abs(gx2 - gx1 + 1) * nd.abs(gy2 - gy1 + 1)
            iw = nd.maximum(
                nd.minimum(px2, gx2) - nd.maximum(px1, gx1) + 1., 0.)
            ih = nd.maximum(
                nd.minimum(py2, gy2) - nd.maximum(py1, gy1) + 1., 0.)
            ain = iw * ih + 1.
            union = apd + agt - ain + 1
            ious = nd.maximum(ain / union, 0.)
            fg_mask = nd.where(clst > 0, nd.ones_like(clst),
                               nd.zeros_like(clst))
            box_loss = -nd.log(nd.minimum(ious + self._eps, 1.)) * fg_mask
            if self._return_iou:
                box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask),
                                                         1), ious
            else:
                box_loss = nd.sum(box_loss) / nd.maximum(nd.sum(fg_mask), 1)
            box_losses.append(box_loss)

            # mask loss
            rank = (-matche).argsort(axis=-1)
            rank = nd.split(rank, 2, axis=0, squeeze_axis=True)
            matche = nd.split(matche, 2, axis=0, squeeze_axis=True)
            maskp = nd.split(maskp, 2, axis=0, squeeze_axis=True)
            maskt = nd.split(maskt, 2, axis=0, squeeze_axis=True)
            boxt = nd.split(boxt, 2, axis=0, squeeze_axis=True)
            maskcoep = nd.split(maskcoep, 2, axis=0, squeeze_axis=True)
            agt = nd.split(agt, 2, axis=0, squeeze_axis=True)
            mask_loss = []
            for ranki, matchei, maskpi, maskti, boxti, maskcoepi, agti in zip(
                    rank, matche, maskp, maskt, boxt, maskcoep, agt):
                idx = nd.slice(ranki, 0, 200)
                pos_mask = nd.take(matchei >= 0, idx)
                pos_box = nd.take(boxti, idx)
                area = nd.take(agti, idx)
                weight = (self.gt_weidth * self.gt_height /
                          (area + self._eps)) * pos_mask
                mask_idx = nd.take(matchei, idx)
                maskti = nd.take(maskti, mask_idx)
                maskpi = nd.dot(nd.take(maskcoepi, idx), maskpi)
                maskpi = nd.sigmoid(maskpi)
                with autograd.pause():
                    _h = nd.arange(186, ctx=maskpi.context)
                    _w = nd.arange(186, ctx=maskpi.context)
                    _h = nd.tile(_h, reps=(pos_box.shape[0], 1))
                    _w = nd.tile(_w, reps=(pos_box.shape[0], 1))
                    x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale),
                                              num_outputs=4,
                                              axis=-1)
                    _w = (_w >= x1) * (_w <= x2)
                    _h = (_h >= y1) * (_h <= y2)
                    _mask = nd.batch_dot(_h.expand_dims(axis=-1),
                                         _w.expand_dims(axis=-1),
                                         transpose_b=True)
                maskpi = maskpi * _mask
                mask_loss.append(
                    nd.sum(self.SBCELoss(maskpi, maskti) * weight) /
                    nd.sum(pos_mask + self._eps))

            # if sum(pos_num)>1400:
            #     print(sum(pos_num))
            #     print(pos_num)
            # pos_num = (matche >=0).sum(axis=-1).asnumpy()
            # rank = (-matche).argsort(axis=-1)
            # mask_loss = []
            # for i in range(maskp.shape[0]):
            #     if pos_num[i] == 0.:
            #         # print(pos_num)
            #         mask_loss.append(nd.zeros(shape=(1,), ctx=maskp.context))
            #         continue
            #     idx = rank[i, :int(pos_num[i])]
            #     pos_box = nd.take(boxt[i], idx)
            #     area = (pos_box[:, 3] - pos_box[:, 1]) * (pos_box[:, 2] - pos_box[:, 0])
            #     weight = self.gt_weidth * self.gt_height / (area+self._eps)
            #     maskti = maskt[i, matche[i, idx], :, :]
            #     maskpi = nd.dot(nd.take(maskcoep[i], idx), maskp[i])
            #     _, h, w = maskpi.shape
            #     maskpi = nd.sigmoid(maskpi)
            #     with autograd.pause():
            #         _h = nd.arange(h, ctx=maskpi.context)
            #         _w = nd.arange(w, ctx=maskpi.context)
            #         _h = nd.tile(_h, reps=(pos_box.shape[0], 1))
            #         _w = nd.tile(_w, reps=(pos_box.shape[0], 1))
            #         x1, y1, x2, y2 = nd.split(nd.round(pos_box / scale), num_outputs=4, axis=-1)
            #         _w = (_w >= x1) * (_w <= x2)
            #         _h = (_h >= y1) * (_h <= y2)
            #         _mask = nd.batch_dot(_h.expand_dims(axis=-1), _w.expand_dims(axis=-1), transpose_b=True)
            #     maskpi = maskpi * _mask
            #     mask_loss.append(nd.sum(self.SBCELoss(maskpi, maskti) * weight)/pos_num[i])
            mask_loss = nd.mean(nd.concat(*mask_loss, dim=0))
            mask_losses.append(mask_loss)
            sum_losses.append(self._cls_lambd * cls_losses[-1] +
                              self._ctr_lambd * ctr_losses[-1] +
                              self._box_lambd * box_losses[-1] +
                              self._mask_lambd * mask_losses[-1])

        return sum_losses, cls_losses, ctr_losses, box_losses, mask_losses
示例#10
0
    def forward(self, inputs, target, next_word_history, cache_history, begin_state=None): # pylint: disable=arguments-differ
        """Defines the forward computation for cache cell. Arguments can be either
        :py:class:`NDArray` or :py:class:`Symbol`.

        Parameters
        ----------
        inputs: NDArray
            The input data
        target: NDArray
            The label
        next_word_history: NDArray
            The next word in memory
        cache_history: NDArray
            The hidden state in cache history


        Returns
        --------
        out: NDArray
            The linear interpolation of the cache language model
            with the regular word-level language model
        next_word_history: NDArray
            The next words to be kept in the memory for look up
            (size is equal to the window size)
        cache_history: NDArray
            The hidden states to be kept in the memory for look up
            (size is equal to the window size)
        """
        output, hidden, encoder_hs, _ = \
            super(self.lm_model.__class__, self.lm_model).\
                forward(inputs, begin_state)
        encoder_h = encoder_hs[-1].reshape(-3, -2)
        output = output.reshape(-1, self._vocab_size)

        start_idx = len(next_word_history) \
            if next_word_history is not None else 0
        next_word_history = nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                        for t in target], dim=0) if next_word_history is None \
            else nd.concat(next_word_history,
                           nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                       for t in target], dim=0), dim=0)
        cache_history = encoder_h if cache_history is None \
            else nd.concat(cache_history, encoder_h, dim=0)

        out = None
        softmax_output = nd.softmax(output)
        for idx, vocab_L in enumerate(softmax_output):
            joint_p = vocab_L
            if start_idx + idx > self._window:
                valid_next_word = next_word_history[start_idx + idx - self._window:start_idx + idx]
                valid_cache_history = cache_history[start_idx + idx - self._window:start_idx + idx]
                logits = nd.dot(valid_cache_history, encoder_h[idx])
                cache_attn = nd.softmax(self._theta * logits).reshape(-1, 1)
                cache_dist = (cache_attn.broadcast_to(valid_next_word.shape)
                              * valid_next_word).sum(axis=0)
                joint_p = self._lambdas * cache_dist + (1 - self._lambdas) * vocab_L

            out = joint_p[target[idx]] if out is None \
                else nd.concat(out, joint_p[target[idx]], dim=0)
        next_word_history = next_word_history[-self._window:]
        cache_history = cache_history[-self._window:]
        return out, next_word_history, cache_history, hidden
示例#11
0
def train():
    # 1. Init params
    weight_scale = .1
    rho_offset = -3

    # initialize variational parameters; mean and variance for each weight
    mus = []
    rhos = []

    for shape in layer_param_shapes:
        mu = nd.random_normal(shape=shape, ctx=ctx, scale=weight_scale)
        rho = rho_offset + nd.zeros(shape=shape, ctx=ctx)
        mus.append(mu)
        rhos.append(rho)

    variational_params = mus + rhos

    for param in variational_params:
        param.attach_grad()

    # 2. Functions for main training loop
    def sample_epsilons(param_shapes):
        epsilons = [
            nd.random_normal(shape=shape, loc=0., scale=1.0, ctx=ctx)
            for shape in param_shapes
        ]
        return epsilons

    def softplus(x):
        return nd.log(1. + nd.exp(x))

    def transform_rhos(rhos):
        return [softplus(rho) for rho in rhos]

    def transform_gaussian_samples(mus, sigmas, epsilons):
        samples = []
        for j in range(len(mus)):
            samples.append(mus[j] + sigmas[j] * epsilons[j])
        return samples

    # 3. Complete training loop
    epochs = config['epochs']
    learning_rate = config['learning_rate']
    smoothing_constant = .01
    train_acc = []
    test_acc = []

    for e in range(epochs):
        for i, (data, label) in enumerate(train_data):
            print(data.shape, label.shape)
            if i == 5:
                break
            data = data.as_in_context(ctx).reshape((-1, 784))
            label = label.as_in_context(ctx)
            label_one_hot = nd.one_hot(label, 10)

            with autograd.record():
                # sample epsilons from standard normal
                epsilons = sample_epsilons(layer_param_shapes)

                # compute softplus for variance
                sigmas = transform_rhos(rhos)

                # obtain a sample from q(w|theta) by transforming the epsilons
                layer_params = transform_gaussian_samples(
                    mus, sigmas, epsilons)

                # forward-propagate the batch
                output = net(data, layer_params)

                # calculate the loss
                loss = combined_loss(output, label_one_hot, layer_params, mus,
                                     sigmas, gaussian_prior,
                                     log_softmax_likelihood)

            # backpropagate for gradient calculation
            loss.backward()

            # apply stochastic gradient descent to variational parameters
            SGD(variational_params, learning_rate)

            # calculate moving loss for monitoring convergence
            curr_loss = nd.mean(loss).asscalar()
            moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                           (1 - smoothing_constant) * moving_loss +
                           (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net, mus)
    train_accuracy = evaluate_accuracy(train_data, net, mus)
    train_acc.append(np.asscalar(train_accuracy))
    test_acc.append(np.asscalar(test_accuracy))
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

    return [mu.asnumpy().tolist() for mu in mus]
示例#12
0
def to_onehot(X, size):  # 本函数已保存在d2lzh包中方便以后使用
    # 5 x 2
    #
    return [nd.one_hot(x, size) for x in X.T]
示例#13
0
from mxnet import autograd, nd
from mxnet.gluon import loss as gloss
import time

# corpus_indices    语料库索引
# char_to_idx       char to idx
# idx_to_char       idx to char
# vocab_size        词汇大小(不同汉字的总数)
(corpus_indices, char_to_idx, idx_to_char,
 vocab_size) = d2l.load_data_jay_lyrics()

# vocab_size        1027
# 第0行              0的位置是1
# 第1行              2的位置是1
# 2 x 1027
tmp = nd.one_hot(nd.array([0, 2]), vocab_size)
print(tmp)


#
def to_onehot(X, size):  # 本函数已保存在d2lzh包中方便以后使用
    # 5 x 2
    #
    return [nd.one_hot(x, size) for x in X.T]


# 2 x 5
X = nd.arange(10).reshape((2, 5))

# 2 x 1027
# 2 x 1027
示例#14
0
    def calculation(self, input_str, en_dict, ko_dict, ko_rev_dict, ctx):
        """
        inference 코드 
        """
        #앞뒤에 START,END 코드 추가
        input_str = [
            [
                'START',
            ] + mecab.morphs(input_str.strip()) + [
                'END',
            ],
        ]
        X = encoding_and_padding(input_str,
                                 en_dict,
                                 max_seq=self.max_seq_length)
        #string to embed
        inputs = F.array(X, ctx=ctx)

        inputs = F.cast(inputs, dtype='float32')
        in_sent_last_idx = F.argmax(F.where(inputs == self.end_idx,
                                            F.ones_like(inputs),
                                            F.zeros_like(inputs)),
                                    axis=1)

        #encoder GRU
        embeddinged_in = F.cast(self.embedding(inputs), dtype='float32')
        next_h = F.random.normal(0, 1, (1, self.n_hidden), ctx=ctx)
        for j in range(self.in_seq_len):
            p_outputs = F.slice_axis(embeddinged_in,
                                     axis=1,
                                     begin=j,
                                     end=j + 1)
            p_outputs = F.reshape(p_outputs, (-1, self.embed_dim))
            enout, (next_h, ) = self.encoder(p_outputs, [
                next_h,
            ])
            if j == 0:
                enouts = enout
                next_hs = next_h
            else:
                enouts = F.concat(enouts, enout, dim=1)
                next_hs = F.concat(next_hs, next_h, dim=1)
        #masking with 0 using length
        enouts = F.reshape(enouts, (-1, self.in_seq_len, self.n_hidden))
        enouts = F.transpose(enouts, (1, 0, 2))
        enouts = F.SequenceMask(enouts,
                                sequence_length=in_sent_last_idx + 1,
                                use_sequence_length=True)
        enouts = F.transpose(enouts, (1, 0, 2))

        next_hs = F.reshape(next_hs, (-1, self.n_hidden))
        #take가 0 dim만 지원하기 때문에..
        # N, 30, 300 -> N * 30, 300 , N = (0,1,2,3,4,5...)
        next_hs = next_hs.take(in_sent_last_idx)

        #디코더의 초기 입력값으로 넣을 'START'를 임베딩한다.
        Y_init = F.array([
            [
                ko_dict['START'],
            ],
        ], ctx=ctx)
        Y_init = F.cast(self.embedding(Y_init), dtype='float32')
        deout = Y_init[:, 0, :]

        #출력 시퀀스 길이만큼 순회
        for i in range(self.out_seq_len):
            if self.attention:
                #print(deout.shape)
                deout, att_weight = self.apply_attention(
                    F=F, inputs=deout, hidden=next_hs, encoder_outputs=enouts)
                if i == 0:
                    att_weights = att_weight
                else:
                    att_weights = F.concat(att_weights, att_weight, dim=0)
            deout, (next_hs, ) = self.decoder(deout, [
                next_hs,
            ])
            #batchnorm을 적용하기 위해 차원 증가/원복
            deout = F.expand_dims(deout, axis=1)
            deout = self.batchnorm(deout)
            #reduce dim
            deout = deout[:, 0, :]
            #'START'의 다음 시퀀스 출력값도출
            deout_sm = self.dense(deout)
            #print(deout_sm.shape)
            deout = F.one_hot(F.argmax(F.softmax(deout_sm, axis=1), axis=1),
                              depth=self.vocab_size)
            #print(deout.shape)
            #decoder에 들어갈 수 있는 형태로 변환(임베딩 적용 및 차원 맞춤)
            deout = F.argmax(deout, axis=1)
            deout = F.expand_dims(deout, axis=0)
            deout = F.cast(self.embedding(deout)[:, 0, :], dtype='float32')
            gen_char = ko_rev_dict[F.argmax(deout_sm,
                                            axis=1).asnumpy()[0].astype('int')]
            if gen_char == '__PAD__' or gen_char == 'END':
                break
            else:
                if i == 0:
                    ret_seq = [
                        gen_char,
                    ]
                else:
                    ret_seq += [
                        gen_char,
                    ]
        return (" ".join(ret_seq), att_weights)
def train_and_valid(en_bert, mt_model, en_vocab, ch_vocab, train_dataiter,
                    dev_dataiter, trainer, en_finetune_trainer, epochs,
                    loss_func, ctx, lr, batch_size, params_save_step,
                    params_save_path_root, eval_step, log_step, check_step,
                    label_smooth, logger, num_train_examples, warmup_ratio):
    batches = len(train_dataiter)

    num_train_steps = int(num_train_examples / batch_size * epochs)
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    global_step = 0
    dev_bleu_score = 0

    for epoch in range(epochs):
        for trans, aim, label, trans_valid_len, aim_valid_len in train_dataiter:
            if global_step < num_warmup_steps:
                new_lr = lr * global_step / num_warmup_steps
            else:
                non_warmup_steps = global_step - num_warmup_steps
                offset = non_warmup_steps / (num_train_steps -
                                             num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)

            trans = trans.as_in_context(ctx)
            aim = aim.as_in_context(ctx)
            label = label.as_in_context(ctx)
            trans_valid_len = trans_valid_len.as_in_context(ctx)
            trans_token_type = nd.zeros_like(trans, ctx=ctx)

            aim_mask = nd.not_equal(aim, ch_vocab(ch_vocab.padding_token))

            if label_smooth:
                eps = 0.1
                num_class = len(ch_vocab.idx_to_token)
                one_hot = nd.one_hot(label, num_class)
                one_hot_label = one_hot * (1 - eps) + (
                    1 - one_hot) * eps / num_class

            with autograd.record():
                en_bert_outputs = en_bert(trans, trans_token_type,
                                          trans_valid_len)
                mt_outputs = mt_model(en_bert_outputs, trans, aim)
                loss_mean = loss_func(mt_outputs, one_hot_label, aim_mask)

            loss_mean.backward()
            loss_scalar = loss_mean.asscalar()

            trainer.step(1)
            en_finetune_trainer.step(1)

            if global_step and global_step % log_step == 0:
                predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1)
                correct = nd.equal(label, predicts)
                accuracy = (nd.sum(correct * aim_mask) /
                            nd.sum(aim_mask)).asscalar()
                logger.info(
                    "epoch:{}, batch:{}/{}, bleu:{}, acc:{}, loss:{}, (lr:{}s)"
                    .format(epoch, global_step % batches, batches,
                            dev_bleu_score, accuracy, loss_scalar,
                            trainer.learning_rate))

            if global_step and global_step % check_step == 0:
                predicts = nd.argmax(nd.softmax(mt_outputs, axis=-1), axis=-1)
                refer_sample = trans.asnumpy().tolist()
                label_sample = label.asnumpy().tolist()
                pred_sample = predicts.asnumpy().tolist()
                logger.info("train sample:")
                logger.info("refer  :{}".format(" ".join([
                    en_vocab.idx_to_token[int(idx)] for idx in refer_sample[0]
                ])).replace(en_vocab.padding_token, ""))
                logger.info("target :{}".format(" ".join([
                    ch_vocab.idx_to_token[int(idx)] for idx in label_sample[0]
                ])).replace(EOS, "[EOS]").replace(ch_vocab.padding_token, ""))
                logger.info("predict:{}".format(" ".join([
                    ch_vocab.idx_to_token[int(idx)] for idx in pred_sample[0]
                ])).replace(EOS, "[EOS]"))

            if global_step and global_step % eval_step == 0:
                dev_bleu_score = eval(en_bert,
                                      mt_model,
                                      en_vocab,
                                      ch_vocab,
                                      dev_dataiter,
                                      logger,
                                      ctx=ctx)

            if global_step and global_step % params_save_step == 0:
                if not os.path.exists(params_save_path_root):
                    os.makedirs(params_save_path_root)
                model_params_file = params_save_path_root + \
                    "en_bert.ft_step_{}.params".format(global_step)
                en_bert.save_parameters(model_params_file)
                logger.info("{} Save Completed.".format(model_params_file))

                model_params_file = params_save_path_root + \
                    "mt_step_{}.params".format(global_step)
                mt_model.save_parameters(model_params_file)
                logger.info("{} Save Completed.".format(model_params_file))

            global_step += 1
import d2lzh as d2l
from mxnet import autograd, nd
from mxnet.gluon import loss as gloss
import math, time, numpy as np

# 读取数据
# corpus_indices 1w字的idx
# char_to_idx    字符转idx
# idx_to_char     idx转字符
# vocab_size不同字的个数
(corpus_indices, char_to_idx, idx_to_char,
 vocab_size) = d2l.load_data_jay_lyrics()

# one-hot向量
print(nd.one_hot(nd.array([1, 2]), vocab_size))  # one-hot一行只有一个1,哪个位置是1呢?1,2位置


def to_onehot(X, size):
    return [nd.one_hot(x, size) for x in X.T]  # X中列是feature,行是sample


# Test
X = nd.arange(10).reshape((2, 5))  # 2:batch_size  5:num_step
inputs = to_onehot(X, vocab_size)  # 转成num_steps个形状为(batch_size,vocab_size)
np.set_printoptions(edgeitems=6)  # 显示个数设置,默认显示3个
print(len(inputs), inputs[0])  # 5个长度, 2*1027

################################################# TODO 初始化模型参数 #####################################################
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size
ctx = d2l.try_gpu()
print('use ', ctx)
示例#17
0
文件: func.py 项目: chr5tphr/ecGAN
def fuzzy_one_hot(arr, size):
    x = arr.reshape((-1, ))
    return nd.where(nd.one_hot(x, size),
                    nd.uniform(low=0.7, high=1.2, shape=(x.shape[0], size), ctx=x.context),
                    nd.uniform(low=0.0, high=0.3, shape=(x.shape[0], size), ctx=x.context))
示例#18
0
    def forward(self,
                inputs,
                target,
                next_word_history,
                cache_history,
                begin_state=None):  # pylint: disable=arguments-differ
        """Defines the forward computation for cache cell. Arguments can be either
        :py:class:`NDArray` or :py:class:`Symbol`.

        Parameters
        ----------
        inputs: NDArray
            The input data
        target: NDArray
            The label
        next_word_history: NDArray
            The next word in memory
        cache_history: NDArray
            The hidden state in cache history


        Returns
        --------
        out: NDArray
            The linear interpolation of the cache language model
            with the regular word-level language model
        next_word_history: NDArray
            The next words to be kept in the memory for look up
            (size is equal to the window size)
        cache_history: NDArray
            The hidden states to be kept in the memory for look up
            (size is equal to the window size)
        """
        output, hidden, encoder_hs, _ = \
            super(self.lm_model.__class__, self.lm_model).\
                forward(inputs, begin_state)
        encoder_h = encoder_hs[-1].reshape(-3, -2)
        output = output.reshape(-1, self._vocab_size)

        start_idx = len(next_word_history) \
            if next_word_history is not None else 0
        next_word_history = nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                        for t in target], dim=0) if next_word_history is None \
            else nd.concat(next_word_history,
                           nd.concat(*[nd.one_hot(t[0], self._vocab_size, on_value=1, off_value=0)
                                       for t in target], dim=0), dim=0)
        cache_history = encoder_h if cache_history is None \
            else nd.concat(cache_history, encoder_h, dim=0)

        out = None
        softmax_output = nd.softmax(output)
        for idx, vocab_L in enumerate(softmax_output):
            joint_p = vocab_L
            if start_idx + idx > self._window:
                valid_next_word = next_word_history[start_idx + idx -
                                                    self._window:start_idx +
                                                    idx]
                valid_cache_history = cache_history[start_idx + idx -
                                                    self._window:start_idx +
                                                    idx]
                logits = nd.dot(valid_cache_history, encoder_h[idx])
                cache_attn = nd.softmax(self._theta * logits).reshape(-1, 1)
                cache_dist = (cache_attn.broadcast_to(valid_next_word.shape) *
                              valid_next_word).sum(axis=0)
                joint_p = self._lambdas * cache_dist + (
                    1 - self._lambdas) * vocab_L

            out = joint_p[target[idx]] if out is None \
                else nd.concat(out, joint_p[target[idx]], dim=0)
        next_word_history = next_word_history[-self._window:]
        cache_history = cache_history[-self._window:]
        return out, next_word_history, cache_history, hidden
 def forward(self, inputs, state):
     X = nd.one_hot(inputs.T, self.V)
示例#20
0
    def forward(self,
                img,
                xs,
                anchors,
                offsets,
                gt_boxes,
                gt_ids,
                gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        self._fake_x, self._feat_maps, self._anchors, self._offsets,
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.      img = mx.nd.zeros((1, 3, 416, 416))   
        xs : list of mxnet.nd.NDArray   [[13, 13], [26, 26], [52, 52]]
            List of feature maps. 
        anchors : mxnet.nd.NDArray      [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] 
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray      [[1, 13*13,1,2], [1, 26*26,1,2], [1, 52*52,1,2]]
            Pre-generated x and y offsets for YOLO3.   
            # 相对的是grid cell左上角的偏移量
      
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs. 
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.
        
        _fake_x  shape : [1, 3, 416, 416]  # img
        都是list类型
        feat_maps:[                         # xs
                (1, 1, 13, 13)
                (1, 1, 26, 26)
                (1, 1, 52, 52)]
        anchors:[
                (1, 1, 3, 2)     # 13 * 13
                (1, 1, 3, 2)     # 26 * 26 
                (1, 1, 3, 2)     # 52 * 53]

        offsets:[
                (1, 169,  1, 2)   # 13 * 13
                (1, 676,  1, 2)   # 26 * 26
                (1, 2704, 1, 2)   # 52 * 53]

         gt_boxes    = train_dataset[0][1] [np.newaxis, :, :4]) [B,M,4]
         gt_ids      = train_dataset[0][1] [np.newaxis, :, :4:5])
         gt_mixratio = train_dataset[0][1] [np.newaxis, :, -1:])

        Returns
        -------
        # 需要生成的因素
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.


        self._target_generator(
            self._fake_x, self._feat_maps, self._anchors, self._offsets,
            gt_bboxes, gt_ids, gt_mixratio)


        anchors_lst = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
        anchors = [nd.array(a) for a in anchors_lst] 
        
        offsets = [ nd.arange(13*13*2).reshape(1,13*13, 1, 2),
            nd.arange(26*26*2).reshape(1,26*26, 1, 2),
            nd.arange(52*52*2).reshape(1,52*52, 1, 2)]


        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors],
                                dim=0)  # shape = [3549, 2] 169 + 676 + 2704
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)

        num_anchors = np.cumsum([a.size // 2 for a in anchors
                                 ])  # num_anchors = array(3, 6, 9)
        num_offsets = np.cumsum([
            o.size // 2 for o in offsets
        ])  # num_offsets = array(169, 169 + 676, 169 + 676 + 2704)
        _offsets = [
            0
        ] + num_offsets.tolist()  # _offsets    = [0, 169, 845, 3549]
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(
            offsets)  # 三者数量保持一致,一个anchor 对应一个offset

        # orig image size
        orig_height = img.shape[2]  # 416
        orig_width = img.shape[3]  # 416

        # 训练中暂时停止记录梯度,此时autograd.is_training 为True
        with autograd.pause():
            # outputs
            '''
                all_anchors.reshape --> [1, 9, 2]  这是一个grid cell的anchor
                all_offsets.reshape --> [3549, 1, 2]  # 这是所有的feature map
                相乘再expand_dims shape --> (1, 3549, 9, 2) # 每个grid cell 都有9个 anchor

                repeat --> shape_like.shape = [1, 3549, 9, 2]   

                weights.split(axis=-1, num_outputs=2)[0] --> [1, 3549, 9, 2]
                objectness shape = [1, 3549, 9, 1]
            '''
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0],
                                                  axis=0)
            ''' 全部初始化为0 '''
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)
            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(
                weights.split(axis=-1, num_outputs=2)[0])
            '''
            objectness.squeeze(axis=-1) 
                shape = [1, 3549, 9] 
            class_targets              
                shape = [gt_ids.shape[0], 3549, 9, self._num_class] 
                默认值全是-1,即忽略
                
            '''
            class_targets = nd.one_hot(objectness.squeeze(axis=-1),
                                       depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores
            '''
            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
                即,对于每个ground-truth寻找与之最匹配的anchor box,要在ground-truth所在的grid cell产生的box里寻找

            
            shift_gt_boxes 还是一个四角坐标,[1, M, 4]
 
            anchor_boxes shape = [1, 9,4] 前面两个数是表示是box的中心(0, 0),后面两个数是priors的宽和高
            shift_anchor_boxes 化为四角坐标: [1, 9, 4]
            

            ious shape = [1,9, M],M是具体某个gt-bbox里面的objness数量

            gtx shape:[1, M, 1]
            gty shape:[1, M, 1]
            gtw shape:[1, M, 1]
            gth shape:[1, M, 1]
            
            '''
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw,
                                       -0.5 * gth,
                                       0.5 * gtw,
                                       0.5 * gth,
                                       dim=-1)  # zero center

            anchor_boxes = nd.concat(0 * all_anchors, all_anchors,
                                     dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)  # 又转换为四角坐标

            ious = nd.contrib.box_iou(shift_anchor_boxes,
                                      shift_gt_boxes).transpose(
                                          (1, 0, 2))  # (1, 9, M)
            # real value is required to process, convert to Numpy
            '''
                IoU: 得到的是所有的anchor与每个gt_boxes的IoU,
                    ious.argmax(axis=1)得到的是M个gt_box与所有的anchor得到的最大IoU的索引。
                    这里 一个grid cell对应anchor 有9个, 但是只有一个anchor 最符合gt_box 。
                        nlayer = np.nonzero(num_anchors > match)[0][0] 
                    就是判断哪一层的anchor最符合
            '''
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(
                axis=-1)  # [B, M, 4]--> [B, M] 1则有效,如果是0则无效(即超过图像左上角边界)

            np_gtx, np_gty, np_gtw, np_gth = [
                x.asnumpy() for x in [gtx, gty, gtw, gth]
            ]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy(
            ) if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.

            for b in range(matches.shape[0]):  # batch
                for m in range(matches.shape[1]):  # ground-truth 个数
                    if valid_gts[b, m] < 1:  # 无效的gt,忽略此次循环
                        break
                    match = int(matches[b, m])  # 取出与这这个gt最匹配的anchor的索引
                    nlayer = np.nonzero(num_anchors > match)[0][0]

                    height = xs[nlayer].shape[2]  # 13,26, 52
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    '''
                    index = _offsets[nlayer] + loc_y * width + loc_x ???
                            _offsets = [0, 169, 845, 3549],
                                grid cell的位置,从上一层开始计算,   即_offsets[nlayer]
                                loc_y * width  : 在每一阶段的feature map上, 大小是width * height ,  loc_y * width 表示位于第几行(因为一行有width个元素)
                                loc_x : 表示第几列的位置。
                    gtx 是原图中的坐标,转换为相对每个grid cell的一个偏移量
                        1 首先需要确定gtx在当前feature map上的位置  = gtx/stride, 即:gtx / orig_width * width 
                        2  loc_x = int(gtx / orig_width * width) 即当前grid cell左上角的坐标位置
                        3  gtx / orig_width * width - loc_x,
                    同理,gty

                    w/h 
                        根据gtw/gth的位置计算公式: 
                        tw = log(gtw/pw), 
                            其中pw是anchor的的width,由于tw是个比例系数,无论在哪个scale下的比例都是一致,
                            因此直接gtw/pw
                        th = log(gth/ph), 其中ph是anchor的的height
                    
                        weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height     
                        这个是在计算损失时候x,y,w,h的一个系数,为什么这么算,还真是不太理解
                    '''
                    loc_x = int(
                        gtx / orig_width * width
                    )  # loc_x  = gtx / stride  gtx是原图坐标,计算在当前feature map上落入的grid cell左上角的x位置
                    loc_y = int(
                        gty / orig_height * height
                    )  # loc_x  = gty / stride                计算在当前feature map上落入的grid cell左上角的y位置
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    # shape = [B, 3549, 9, 2]
                    center_targets[
                        b, index, match,
                        0] = gtx / orig_width * width - loc_x  #  sigmoid(tx) 得到的是小于1的小数,相对当前fgrid cell左上角的偏移量
                    center_targets[
                        b, index, match,
                        1] = gty / orig_height * height - loc_y  #  sigmoid(ty)
                    scale_targets[b, index, match, 0] = np.log(
                        max(gtw, 1) / np_anchors[match, 0])  # tw
                    scale_targets[b, index, match, 1] = np.log(
                        max(gth, 1) / np_anchors[match, 1])  # th
                    weights[
                        b, index,
                        match, :] = 2.0 - gtw * gth / orig_width / orig_height  # ????
                    objectness[b, index, match,
                               0] = (np_gt_mixratios[b, m, 0]
                                     if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match,
                                  int(np_gt_ids[b, m, 0])] = 1  # 实现one-hot编码

            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors,
                                         num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors,
                                        num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors,
                                        num_offsets)
        # 最后输出的维度都是:# [(B, 10647, 1 or 2)]
        # 其中,B = 1,  10647 = 13 * 13 * 3 +  26 * 26 * 3 + 52 * 52 * 3
        return objectness, center_targets, scale_targets, weights, class_targets
示例#21
0
 def forward(self,inputs, state):
     X = nd.one_hot(inputs.T, self.vocab_size)
     Y, state = self.rnn(X, state)
     output = self.Dense(Y.reshape((-1, Y.shape[-1]))) 
示例#22
0
 def forward(self, inputs, state, *args):
     X = nd.one_hot(inputs.T, self.data_size)
     Y, state = self.rnn(X, state)
     output = self.dense(Y.reshape((-1, Y.shape[-1])))
     return output, state
示例#23
0
def to_onehot(X, size):
    """Represent inputs with one-hot encoding."""
    return [nd.one_hot(x, size) for x in X.T]
示例#24
0
    ctx = mx.cpu()

    train_data, test_data = load_data_mnist(batch_size=batch_size, resize=28)
    #print(train_data.shape)
    net = CapsNet(batch_size=batch_size, ctx=ctx)
    print(net)
    trainer = Trainer(net.collect_params(), 'adam', {'learning_rate': 0.01})

    for epoch in range(epochs):
        train_loss0 = 0.
        train_acc0 = 0.
        train_loss = 0.
        train_acc = 0.
        for i, batch in enumerate(train_data):
            data, label = batch
            one_hot_label = nd.one_hot(label, 10)

            label = label.as_in_context(ctx)
            one_hot_label = one_hot_label.as_in_context(ctx)
            data = data.as_in_context(ctx)

            with autograd.record():
                output = net(data)
                L = CapsuleMarginLoss(output, one_hot_label, lambda_value)

            L.backward()
            trainer.step(data.shape[0])
            n = i + 1

            train_loss += nd.mean(L).asscalar()
            train_acc += nd.mean(nd.argmax(output, axis=1) == label).asscalar()
示例#25
0
    def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0)
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)

            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0])
            class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1)
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]]
            np_anchors = all_anchors.asnumpy()

            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    if valid_gts[b, m] < 1:
                        break
                    match = int(matches[b, m])
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[b, index, match, 1] = gty / orig_height * height - loc_y  # ty
                    scale_targets[b, index, match, 0] = np.log(max(gtw, 1) / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(max(gth, 1) / np_anchors[match, 1])

                    weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    objectness[b, index, match, 0] = (
                        np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors, num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors, num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors, num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets
示例#26
0
def to_onehot(X,size): #one column, one sample
    return [nd.one_hot(x, size) for x in X.T]
示例#27
0
    def forward(self,
                img,
                xs,
                anchors,
                offsets,
                gt_boxes,
                gt_ids,
                gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        # 这里的anchors中是一个大列表套接着三个小列表
        # 以416*416为例,all_anchors---(9, 2)
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        #  这里offsets的作用
        # 以416*416为例,all_offsets---(3549, 2), 3549 = 169(13*13) + 676(26*26) + 2704(52*52)
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        # 以416*416为例,num_anchors----[3, 6, 9]
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        # 以416*416为例,num_offsets----[169, 845, 3549]
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        # 获取训练图片的大小
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            # shape_like: (N * 3549 * 9 * 2): 部分target的维度
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0],
                                                  axis=0)
            # 下面就是存储需要返回的转换好的ground truth值
            # center_targets:cx, cy , (N * 3549 * 9 * 2)
            center_targets = nd.zeros_like(shape_like)
            # scale_targets: w, h , (N * 3549 * 9 * 2)
            scale_targets = nd.zeros_like(center_targets)
            # weights: 含义(TO_DO ), (N * 3549 * 9 * 2)
            weights = nd.zeros_like(center_targets)
            # objectness: 置信度, (N * 3549 * 9 * 1)
            objectness = nd.zeros_like(
                weights.split(axis=-1, num_outputs=2)[0])
            # class_targets: target的label值,这里用one-hot向量表示, (N * 3549 * 9 * self._num_class),初始值全部设置为-1,代表忽略
            class_targets = nd.one_hot(objectness.squeeze(axis=-1),
                                       depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            # 寻找最为匹配的anchor值
            # 由于yolo进行iou匹配时,只看大小上的匹配,这里将box的格式从corner转换为center
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            # 得到一个以(0, 0)为中心点,与样本框同样大小的框,格式又转换为了corner格式
            shift_gt_boxes = nd.concat(-0.5 * gtw,
                                       -0.5 * gth,
                                       0.5 * gtw,
                                       0.5 * gth,
                                       dim=-1)
            # 给预设的9个anchor,前面添加(0,0,),得到如(0, 0, 116, 90),即变成了center格式的,大小为预设框大小的框
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors,
                                     dim=-1)  # zero center anchors
            # 将预设框格式转换为corner的格式与gt的格式对齐
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            # 求取anchor 与 gt box的 iou 值
            ious = nd.contrib.box_iou(shift_anchor_boxes,
                                      shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            # 得到每个gt box与哪一个预设框匹配的最好,也即iou最大
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            # valid_gts是用来记录有效的box的信息,这里相当于一个mask值,对于在dataloader中为了batch同意而pad成-1的框,给出-1的mask值
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [
                x.asnumpy() for x in [gtx, gty, gtw, gth]
            ]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy(
            ) if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            # 外循环:batch的大小,内循环:一张图片中框的匹配层数
            # 这里的循环其实也说明在yolov3训练
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    # pad的过程中是向下增加pad,因此遇到第一个0时,就可跳出当前内循环,进去下一张图片
                    if valid_gts[b, m] < 1:
                        break
                    # 第b张图片的第m个框匹配的最佳anchor的索引 ,这里anchor的索引是从大到小
                    match = int(matches[b, m])
                    # 确切的得到这个框所匹配的anchor处于哪一层
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    # 这里的xs是特征图的集合,这里用以在选择特征图后,提供特征图的高宽
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    # 得到当前框真实的(cx,cy,w,h),相对于原图上的坐标
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    # 将目标框的cx, cy映射到对应anchor层的特征图的坐标
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    # 获取框匹配的cell的索引
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    # 这里组成一个batch的标签的方法是,做一个B*Cell*Anchor*x ,这里的x针对不同的类别值不相同,例如对于center坐标,就是2
                    #获得了cx, cy的标签值,取值范围[0,1]
                    center_targets[b, index, match,
                                   0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[
                        b, index, match,
                        1] = gty / orig_height * height - loc_y  # ty
                    # 获得w,h的标签值
                    scale_targets[b, index, match, 0] = np.log(
                        max(gtw, 1) / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(
                        max(gth, 1) / np_anchors[match, 1])
                    # 这里是为了减小box大小对于loss的影响,在YOLOv1中使用的是预测根号w的方式,这里采用的是如下加权重的方式
                    weights[
                        b, index,
                        match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    # 这里一般讲objectness的target值设置为1
                    # 这样的话,在没有使用mix_up的前提下,在这个target_generator中不同的anchor分为两类,iou最大匹配的设置为1,其他情况设置为0
                    objectness[b, index, match,
                               0] = (np_gt_mixratios[b, m, 0]
                                     if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            # 最后对所有的标签做最后一次切分,得到B * (Cell*Anchor) * x 的格式
            # (TO_DO:)这里的_slice方法的必要性,看的还不太明白
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors,
                                         num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors,
                                        num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors,
                                        num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets
示例#28
0
    def forward(self, img, xs, anchors, offsets, gt_boxes, gt_ids, gt_mixratio=None):
        """Generating training targets that do not require network predictions.

        Parameters
        ----------
        img : mxnet.nd.NDArray
            Original image tensor.
        xs : list of mxnet.nd.NDArray
            List of feature maps.
        anchors : mxnet.nd.NDArray
            YOLO3 anchors.
        offsets : mxnet.nd.NDArray
            Pre-generated x and y offsets for YOLO3.
        gt_boxes : mxnet.nd.NDArray
            Ground-truth boxes.
        gt_ids : mxnet.nd.NDArray
            Ground-truth IDs.
        gt_mixratio : mxnet.nd.NDArray, optional
            Mixup ratio from 0 to 1.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            objectness: 0 for negative, 1 for positive, -1 for ignore.
            center_targets: regression target for center x and y.
            scale_targets: regression target for scale x and y.
            weights: element-wise gradient weights for center_targets and scale_targets.
            class_targets: a one-hot vector for classification.

        """
        assert isinstance(anchors, (list, tuple))
        all_anchors = nd.concat(*[a.reshape(-1, 2) for a in anchors], dim=0)
        assert isinstance(offsets, (list, tuple))
        all_offsets = nd.concat(*[o.reshape(-1, 2) for o in offsets], dim=0)
        num_anchors = np.cumsum([a.size // 2 for a in anchors])
        num_offsets = np.cumsum([o.size // 2 for o in offsets])
        _offsets = [0] + num_offsets.tolist()
        assert isinstance(xs, (list, tuple))
        assert len(xs) == len(anchors) == len(offsets)

        # orig image size
        orig_height = img.shape[2]
        orig_width = img.shape[3]
        with autograd.pause():
            # outputs
            shape_like = all_anchors.reshape((1, -1, 2)) * all_offsets.reshape(
                (-1, 1, 2)).expand_dims(0).repeat(repeats=gt_ids.shape[0], axis=0)
            center_targets = nd.zeros_like(shape_like)
            scale_targets = nd.zeros_like(center_targets)
            weights = nd.zeros_like(center_targets)
            objectness = nd.zeros_like(weights.split(axis=-1, num_outputs=2)[0])
            class_targets = nd.one_hot(objectness.squeeze(axis=-1), depth=self._num_class)
            class_targets[:] = -1  # prefill -1 for ignores

            # for each ground-truth, find the best matching anchor within the particular grid
            # for instance, center of object 1 reside in grid (3, 4) in (16, 16) feature map
            # then only the anchor in (3, 4) is going to be matched
            gtx, gty, gtw, gth = self.bbox2center(gt_boxes)
            shift_gt_boxes = nd.concat(-0.5 * gtw, -0.5 * gth, 0.5 * gtw, 0.5 * gth, dim=-1)
            anchor_boxes = nd.concat(0 * all_anchors, all_anchors, dim=-1)  # zero center anchors
            shift_anchor_boxes = self.bbox2corner(anchor_boxes)
            ious = nd.contrib.box_iou(shift_anchor_boxes, shift_gt_boxes).transpose((1, 0, 2))
            # real value is required to process, convert to Numpy
            matches = ious.argmax(axis=1).asnumpy()  # (B, M)
            valid_gts = (gt_boxes >= 0).asnumpy().prod(axis=-1)  # (B, M)
            np_gtx, np_gty, np_gtw, np_gth = [x.asnumpy() for x in [gtx, gty, gtw, gth]]
            np_anchors = all_anchors.asnumpy()
            np_gt_ids = gt_ids.asnumpy()
            np_gt_mixratios = gt_mixratio.asnumpy() if gt_mixratio is not None else None
            # TODO(zhreshold): the number of valid gt is not a big number, therefore for loop
            # should not be a problem right now. Switch to better solution is needed.
            for b in range(matches.shape[0]):
                for m in range(matches.shape[1]):
                    if valid_gts[b, m] < 1:
                        break
                    match = int(matches[b, m])
                    nlayer = np.nonzero(num_anchors > match)[0][0]
                    height = xs[nlayer].shape[2]
                    width = xs[nlayer].shape[3]
                    gtx, gty, gtw, gth = (np_gtx[b, m, 0], np_gty[b, m, 0],
                                          np_gtw[b, m, 0], np_gth[b, m, 0])
                    # compute the location of the gt centers
                    loc_x = int(gtx / orig_width * width)
                    loc_y = int(gty / orig_height * height)
                    # write back to targets
                    index = _offsets[nlayer] + loc_y * width + loc_x
                    center_targets[b, index, match, 0] = gtx / orig_width * width - loc_x  # tx
                    center_targets[b, index, match, 1] = gty / orig_height * height - loc_y  # ty
                    scale_targets[b, index, match, 0] = np.log(gtw / np_anchors[match, 0])
                    scale_targets[b, index, match, 1] = np.log(gth / np_anchors[match, 1])
                    weights[b, index, match, :] = 2.0 - gtw * gth / orig_width / orig_height
                    objectness[b, index, match, 0] = (
                        np_gt_mixratios[b, m, 0] if np_gt_mixratios is not None else 1)
                    class_targets[b, index, match, :] = 0
                    class_targets[b, index, match, int(np_gt_ids[b, m, 0])] = 1
            # since some stages won't see partial anchors, so we have to slice the correct targets
            objectness = self._slice(objectness, num_anchors, num_offsets)
            center_targets = self._slice(center_targets, num_anchors, num_offsets)
            scale_targets = self._slice(scale_targets, num_anchors, num_offsets)
            weights = self._slice(weights, num_anchors, num_offsets)
            class_targets = self._slice(class_targets, num_anchors, num_offsets)
        return objectness, center_targets, scale_targets, weights, class_targets
示例#29
0
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
    return (numerator / denominator).asscalar()


# Defining some variables for training the model
epochs = 5
learning_rate = .01
smoothing_constant = .01

# Traning loop
for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, num_outputs)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label_one_hot)
        loss.backward()
        SGD(params, learning_rate)
        # Keeping moving average fo the loss
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) else
                       (1 - smoothing_constant) * moving_loss +
                       (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))
 def forward(self, inputs, state):
     X = nd.one_hot(inputs.T, self.vocab_size)
示例#31
0
  def train(self, input_word_idx, input_len, input_seg, target_word_idx, target_len, target_seg, pm_error_idx, pm_add_idx, pm_remove_idx, 
inputs_text, targets_text, devices, batch_size, trainer):
  
    seq_encoding = [None] * len(devices); cls_encoding = [None] * len(devices)
    decoder_state = [None] * len(devices); target_word_emb = [None] * len(devices)
    predict_word_emb = [None] * len(devices); predict_word_logit = [None] * len(devices)
    target_word_logit = [None] * len(devices); input_word_logit = [None] * len(devices)
    loss = [None] * len(devices)
    loss_review = [None] * len(devices)
    loss_pm = [None] * len(devices)
    num_device = len(devices)
    
    encoder_constraint_loss = []
  
    for i in range(num_device):
      with autograd.record():
        seq_encoding[i], cls_encoding[i] = self.encoder(input_word_idx[i], input_seg[i], input_len[i])
        
        if self.config['use_encoder_constraint']:
          pm_add_loss = self.fc_pm_add(seq_encoding[i])
          pm_remove_loss = self.fc_pm_remove(seq_encoding[i])
          encoder_constraint_loss
          max_target_len = int(max(input_len[i].asnumpy()))
      
          _predict_pm_error_logit = nd.softmax(self.fc_pm_error(seq_encoding[i]))
          _predict_pm_add_logit = nd.softmax(self.fc_pm_add(seq_encoding[i]))
          _predict_pm_remove_logit = nd.softmax(self.fc_pm_remove(seq_encoding[i]))
          
          # _target_start_logit = nd.one_hot(start_idx[i], 2).reshape_like(_predict_start_logit)
          # _target_end_logit = nd.one_hot(end_idx[i], 2).reshape_like(_predict_end_logit)
          _target_pm_error_logit = nd.one_hot(pm_error_idx[i], 2).reshape_like(_predict_pm_error_logit)
          _target_pm_add_logit = nd.one_hot(pm_add_idx[i], 2).reshape_like(_predict_pm_add_logit)
          _target_pm_remove_logit = nd.one_hot(pm_remove_idx[i], 2).reshape_like(_predict_pm_remove_logit)
          
          # print('predcit logit sum : ',( _predict_error_logit.argmax(-1) > 0).sum())
          
          pm_error_balance_mask = self.balance_class(_predict_pm_error_logit[:, : max_target_len], pm_error_idx[i][:, : max_target_len]).detach()
          pm_add_balance_mask = self.balance_class(_predict_pm_add_logit[:, : max_target_len], pm_add_idx[i][:, : max_target_len]).detach()
          pm_remove_balance_mask = self.balance_class(_predict_pm_remove_logit[:, : max_target_len], pm_remove_idx[i][:, : max_target_len]).detach()
          
          # _loss_start = self.ce(_predict_start_logit, _target_start_logit)
          # _loss_end = self.ce(_predict_end_logit, _target_end_logit)
          loss_pm_error = self.ce(_predict_pm_error_logit[:, : max_target_len], _target_pm_error_logit[:, : max_target_len]) * pm_error_balance_mask
          loss_pm_add = self.ce(_predict_pm_error_logit[:, : max_target_len], _target_pm_error_logit[:, : max_target_len]) * pm_add_balance_mask
          loss_pm_remove = self.ce(_predict_pm_error_logit[:, : max_target_len], _target_pm_error_logit[:, : max_target_len]) * pm_remove_balance_mask
          
          loss_pm[i] = (loss_pm_error + loss_pm_add + loss_pm_remove) / 3
          
          # print(loss_pm[i])
    
    # nd.waitall()
    for i in range(num_device):
      with autograd.record():
    
      #""" Decoder with word"            
        # seq_encoding[i], cls_encoding[i] = self.encoder(input_word_idx[i], input_seg[i], input_len[i])
        decoder_state[i] = self.decoder.init_state_from_encoder(seq_encoding[i], input_len[i])
        target_word_emb[i] = self.emb_tgt(target_word_idx[i])
        predict_word_emb[i], _, _ = self.decoder.decode_seq(target_word_emb[i], decoder_state[i])#, valid_len)
        
        
        # target_word_logit_train = nd.softmax(self.fc_proj(target_word_emb[i]))
        
        # print(target_word_logit_train.shape)
        
        # print(target_word_logit[i].shape)q
        
        # raise
        
        predict_word_logit[i] = nd.softmax(self.fc_proj(predict_word_emb[i]))
        target_word_logit[i] = nd.one_hot(target_word_idx[i], len(self.vocab_tgt))
        input_word_logit[i] = nd.one_hot(input_word_idx[i], len(self.vocab_src))
        
        max_target_len = int(max(target_len[i].asnumpy()))
        loss_review[i] = self.ce(predict_word_logit[i][:, : max_target_len - 1], target_word_logit[i][:, 1 : max_target_len])
        
        if self.config['use_encoder_constraint']:
          # loss[i] = (loss_review[i].mean() + loss_pm[i].mean()) / 2
          loss[i] = loss_review[i].mean() + loss_pm[i].mean()
        else:
          loss[i] = loss_review[i].mean()  
        
        #loss[i] = loss[i].mean([1]) + (((predict_word_emb[i][:, : max_target_len - 1]) - target_word_emb[i][:, 1 : max_target_len]) ** 2).mean([1, 2])
        
        #+ self.ce(target_word_logit_train[:, 1 : max_target_len], target_word_logit[i][:, 1 : max_target_len])


      # targets_action_embs = self.emb_actions(targets_action)
      # targets_pm_embs = self.emb_pms(targets_pm)
      
      # max_valid_len = int(valid_len.max().asnumpy())
      
      # action_output_embs, _, _ = self.decoder_action.decode_seq(targets_action_embs[ : , : max_valid_len], decoder_action_state)#, valid_len)
      
      # """ Decoder """
      # decoder_pm_state = self.decoder_pm.init_state_from_encoder(seq_encoding, valid_len)
      # decoder_action_state = self.decoder_action.init_state_from_encoder(seq_encoding, valid_len)
      
      # targets_action_embs = self.emb_actions(targets_action)
      # targets_pm_embs = self.emb_pms(targets_pm)
      
      # max_valid_len = int(valid_len.max().asnumpy())
      
      # action_output_embs, _, _ = self.decoder_action.decode_seq(targets_action_embs[ : , : max_valid_len], decoder_action_state)#, valid_len)
      # pm_output_embs, _, _ = self.decoder_pm.decode_seq(targets_pm_embs[ : , : max_valid_len], decoder_pm_state)#, valid_len)                                                                        
      
      # action_output = nd.softmax(self.fc_actions(self.dropout(action_output_embs)))
      # pm_output = nd.softmax(self.fc_pms(self.dropout(pm_output_embs)))
      
      # action_idx = action_output.argmax(-1)
      # pm_idx = pm_output.argmax(-1)
      
      # action_mask, pm_mask = self.balance_multi_objective(action_idx, pm_idx, targets_action, targets_pm, 3)
      
      # targets_action_logits = nd.one_hot(targets_action, len(self.actions))
      # targets_pm_logits = nd.one_hot(targets_pm, len(self.pms))
      
      # action_loss = self.ce(action_output  * action_mask, targets_action_logits[:, 1 : max_valid_len + 1] * action_mask)
      # pm_loss = self.ce(pm_output * pm_mask, targets_pm_logits[:,1 : max_valid_len + 1] * pm_mask)
      
      # loss = action_loss / action_mask.sum().detach() + pm_loss / pm_mask.sum().detach()
      
      
      # """ Decoder End """
      
      # """ Encoder Start """
      
      
      # targets_action_logits = nd.one_hot(targets_action, len(self.actions))
      # targets_pm_logits = nd.one_hot(targets_pm, len(self.pms))
      
      # action_output = nd.softmax(self.fc_actions(self.dropout(seq_encoding)))
      # pm_output = nd.softmax(self.fc_pms(self.dropout(seq_encoding)))
      
      # max_valid_len = int(valid_len.max().asnumpy())
      
      # action_idx = action_output.argmax(-1)
      # pm_idx = pm_output.argmax(-1)
      
      # action_mask, pm_mask = self.balance_multi_objective(action_idx, pm_idx, targets_action, targets_pm, 3)
      
      # action_loss = self.ce(action_output[:, :max_valid_len ] * action_mask[:, :max_valid_len],
      #               targets_action_logits[:, :max_valid_len] * action_mask[:, :max_valid_len])
                    
      # pm_loss = self.ce(pm_output[:, :max_valid_len ] * pm_mask[:, :max_valid_len],
      # targets_pm_logits[:, :max_valid_len] * pm_mask[:, :max_valid_len])
      
      # loss = action_loss.sum() / action_mask.sum() + pm_loss.sum() / pm_mask.sum()
      
      # """ Encoder End """
      
    # debug_action_loss = self.ce((action_output  * action_mask) [0:,  : max_valid_len], targets_action_logits[:, 1 : max_valid_len + 1] * action_mask)
    # debug_pm_loss = self.ce(pm_output[:, : max_valid_len] * pm_mask, targets_pm_logits[0:,1:max_valid_len + 1] * pm_mask)
      
    # print('action loss : ', (action_loss / action_mask.sum()).sum())
    # print('pm_loss : ', (pm_loss / pm_mask.sum()).sum())
    # nd.waitall()

    for _loss in loss:
      _loss.backward()
      # _loss_pm.backward()
    # nd.waitall()

    
    nd.waitall()
    # decode_text = self.decode(inputs_text[0], action_output[0], pm_output[0])
    
    # decode_text_debug = self.decode(inputs_text[0], targets_action_logits[0, 1 : ], targets_pm_logits[0, 1:])
    
    # print('debug => ', decode_text_debug)
    #self.decode_beamsearch(decoder_state[0], int(batch_size / len(devices)), devices[0])
    
    # trainer.step(batch_size, ignore_stale_grad = True)
    trainer.step(1, ignore_stale_grad = True)
    
    if self.config['use_encoder_constraint']:
    
      loss_review = sum([_loss.mean().asnumpy() for _loss in loss_review])
      loss_pm = sum([_loss.mean().asnumpy() for _loss in loss_pm])
      
      return loss_review, loss_pm #, self.decode_greedy(predict_word_logit[0][0]).replace('[PAD]', '')
    else:
      loss_review = sum([_loss.mean().asnumpy() for _loss in loss_review])
      return loss_review, None
示例#32
0
文件: jay_lstm.py 项目: xcszbdnl/Toy
def get_inputs(data, vocab_size):
    return [nd.one_hot(X, vocab_size) for X in data.T]
def to_onehot(X, size):
    return [nd.one_hot(x, size) for x in X.T]  # X中列是feature,行是sample
示例#34
0
文件: func.py 项目: chr5tphr/ecGAN
def fuzzy_one_hot(arr, size):
    x = arr.reshape((-1, ))
    return nd.where(
        nd.one_hot(x, size),
        nd.uniform(low=0.7, high=1.2, shape=(x.shape[0], size), ctx=x.context),
        nd.uniform(low=0.0, high=0.3, shape=(x.shape[0], size), ctx=x.context))
示例#35
0
文件: utils.py 项目: tsintian/d2l-zh
 def forward(self, inputs, state):
     X = nd.one_hot(inputs.T, self.vocab_size)
     Y, state = self.rnn(X, state)
     output = self.dense(Y.reshape((-1, Y.shape[-1])))
     return output, state
def test_one_hot():
    # default dtype of ndarray is float32 which cannot index elements over 2^32
    a = nd.array([1, (VLARGE_X - 1)], dtype=np.int64)
    b = nd.one_hot(a, VLARGE_X)
    b[0][1] == 1
    b[1][-1] == 1
示例#37
0
def to_onehot(X, size):
    """Represent inputs with one-hot encoding."""
    return [nd.one_hot(x, size) for x in X.T]
示例#38
0
文件: train.py 项目: tsintian/d2l-en
def to_onehot(X, size):
    return [nd.one_hot(x, size) for x in X.T]