示例#1
0
    def compute_loss(self, input_vocab, output_vocab, window_words,
                     hidden_states):
        g, rnn_distribution, a = self.decode_one_step(input_vocab,
                                                      window_words,
                                                      hidden_states)
        # define p_vocab as 0 if output word is not in vocab
        p_vocab = F.select_item(
            rnn_distribution,
            xp.array(
                [self.vocab[output_vocab]],
                dtype=xp.int32)) if output_vocab in self.vocab else Variable(
                    xp.array([0.0], dtype=xp.float32))

        # compute cross entropy
        indexes = [i for i, x in enumerate(window_words) if x == output_vocab]
        exist_var = Variable(xp.array([0], dtype=xp.float32))
        for idx in indexes:
            exist_var += F.select_item(a, xp.array([idx], dtype=xp.int32))
        p_ptr = F.cast(exist_var, xp.float32) if indexes else Variable(
            xp.array([0.0], dtype=xp.float32))
        cross_entropy = -F.log(
            F.linear_interpolate(g, p_vocab, p_ptr) +
            Variable(xp.array([0.01], dtype=xp.float32)))

        # compute attention loss
        attention_loss = F.cast(-F.log(g + exist_var),
                                xp.float32) if indexes else Variable(
                                    xp.array([0.0], dtype=xp.float32))
        return cross_entropy + attention_loss
示例#2
0
 def compute_double_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs,
                                    l_done):
     """
     :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|.
     :param l_act: A chainer variable holding a list of actions. Should be of shape N.
     :param l_rew: A chainer variable holding a list of rewards. Should be of shape N.
     :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of
     shape N * |S|.
     :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this
     time step). Should be of shape N.
     :return: A chainer variable holding a scalar loss.
     """
     # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt
     # Hint2: Q-function can be called by self._q.forward(argument)
     # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful
     "*** YOUR CODE HERE ***"
     reward = F.cast(l_rew, np.float32)
     q_forwarded = self._q.forward(l_next_obs)
     qt_forwarded = self._qt.forward(l_next_obs)
     y_non_terminal = reward + self._discount * F.select_item(
         qt_forwarded, F.argmax(q_forwarded, axis=1))
     y_terminal = reward
     y = F.select_item(F.stack([y_non_terminal, y_terminal], axis=1),
                       F.cast(l_done, np.int32))
     Q = F.select_item(self._q.forward(l_obs), l_act)
     return F.mean(F.square(y - Q))
示例#3
0
    def compute_double_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs, l_done):
        """
        :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|.
        :param l_act: A chainer variable holding a list of actions. Should be of shape N.
        :param l_rew: A chainer variable holding a list of rewards. Should be of shape N.
        :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of
        shape N * |S|.
        :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this
        time step). Should be of shape N.
        :return: A chainer variable holding a scalar loss.
        """
        # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt
        # Hint2: Q-function can be called by self._q.forward(argument)
        # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful
         # TODO: replace this line

        feed_forward_learner = self._q.forward(l_obs)
        q_learner = F.select_item(feed_forward_learner, l_act)

        action_q_values = self._q.forward(l_next_obs) 
        best_action = F.argmax(action_q_values, axis=1)
        feed_forward_target = self._qt.forward(l_next_obs)
        q_target = F.select_item(feed_forward_target, best_action)

        terminate = F.cast(l_done, bool)
        l_rew = F.cast(l_rew, "float32")
        final_target = F.where(terminate, l_rew, l_rew + self._discount * q_target).data
        loss = F.mean_squared_error(final_target, q_learner)

        return loss
示例#4
0
    def compute_double_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs,
                                       l_done):
        """
        :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|.
        :param l_act: A chainer variable holding a list of actions. Should be of shape N.
        :param l_rew: A chainer variable holding a list of rewards. Should be of shape N.
        :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of
        shape N * |S|.
        :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this
        time step). Should be of shape N.
        :return: A chainer variable holding a scalar loss.
        """
        # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt
        # Hint2: Q-function can be called by self._q.forward(argument)
        # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful
        obs_q_value = F.select_item(self._q.forward(l_obs), l_act)
        target_q_value = np.zeros(l_done.shape[0])
        for i in range(l_done.shape[0]):
            if l_done[i] == True:
                target_q_value[i] = l_rew[i]
            else:
                q_value_next = self._q.forward(
                    F.expand_dims(l_next_obs[i], axis=0))
                max_idx = F.argmax(q_value_next)
                target_value = self._qt.forward(
                    F.expand_dims(l_next_obs[i], axis=0))
                max_value = F.select_item(target_value,
                                          np.array([max_idx.data]))
                target_q_value[i] = l_rew[i] + self._discount * max_value.data

        loss = F.mean_squared_error(F.cast(target_q_value, np.float32),
                                    F.cast(obs_q_value, np.float32))
        return loss
示例#5
0
 def compute_double_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs,
                                    l_done):
     """
     :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|.
     :param l_act: A chainer variable holding a list of actions. Should be of shape N.
     :param l_rew: A chainer variable holding a list of rewards. Should be of shape N.
     :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of
     shape N * |S|.
     :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this
     time step). Should be of shape N.
     :return: A chainer variable holding a scalar loss.
     """
     # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt
     # Hint2: Q-function can be called by self._q.forward(argument)
     # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful
     # loss = C.Variable(np.array([0.]))  # TODO: replace this line
     l_rew = F.cast(l_rew, np.float32)
     q_future = self._q.forward(l_next_obs)
     qt_future = self._qt.forward(l_next_obs)
     future_rew = l_rew + self._discount * F.select_item(
         qt_future, F.argmax(q_future, axis=1))
     target = F.select_item(F.stack([future_rew, l_rew], axis=1),
                            F.cast(l_done, np.int32))
     y = F.select_item(self._q.forward(l_obs), l_act)
     return F.mean(F.square(y - target))
示例#6
0
    def compute_G(self, in_data, out_grad_data):
        gy = out_grad_data[0]
        xp = cuda.get_array_module(gy)

        gy = gy.transpose(0, 2, 3, 1)  # NCHW -> NHWC
        n, ho, wo, _ = gy.shape

        gy = gy.reshape(n * ho * wo, -1)

        gy_scale = n
        if self._loss_scale is not None:
            gy_scale *= 1.0 / self._loss_scale

        if self.diagonalize:
            if gy.dtype == xp.float16:
                gy = gy_scale * cast(gy, xp.float32).data
            else:
                gy = gy_scale * gy
            G = xp.diag((gy * gy).mean(axis=0))
        else:
            G_scale = 1 / (n * ho * wo) * (gy_scale ** 2)
            if gy.dtype == xp.float16:
                gy = cast(gy, xp.float32).data
            G = gy.T.dot(gy) * G_scale

            diag = getattr(self.link, 'diag', False)
            if diag:
                G = xp.diag(xp.diag(G))

        return G
示例#7
0
def align_speaker(ys, ts):
    """Match shape as num_speaker reported can be more or less

    Args:
     ys: B-length list of predictions
     ts: B-length list of predictions

    Returns:
     ys: Aligned B-length list of predictions
     ts: Aligned B-length list of predictions
    """
    num_speakers = [max(y.shape[1], t.shape[1]) for y, t in zip(ys, ts)]
    ys = [
        F.pad(y, ((0, 0), (0, n_spk - y.shape[1])),
              'constant',
              constant_values=0) for y, n_spk in zip(ys, num_speakers)
    ]
    ts = [
        F.cast(
            F.pad(F.cast(t, 'f'), ((0, 0), (0, n_spk - t.shape[1])),
                  'constant',
                  constant_values=0), 'i').array
        for t, n_spk in zip(ts, num_speakers)
    ]
    return ys, ts
示例#8
0
    def __call__(self, x, t):

        x = chainer.Variable(self.xp.asarray(x))
        t = chainer.Variable(self.xp.asarray(t))
        #print(x.shape)
        #print(t.shape)
        batchsize = x.data.shape[0]
        self.reset_state()

        # initial l
        l = np.random.uniform(-1, 1, size=(batchsize, 2)).astype(np.float32)
        l = chainer.Variable(self.xp.asarray(l))

        sum_ln_pi = Variable((self.xp.zeros((batchsize, 1))))
        sum_ln_pi = F.cast(sum_ln_pi, 'float32')
        l, ln_pi, y, b = self.forward(
            l.shape[0],
            x,
            l,
            first=True,
        )
        for i in range(1, self.n_steps):
            l, ln_pi, y, b = self.forward(l.shape[0], x, l)
            ln_pi = F.cast(ln_pi, 'float32')
            sum_ln_pi += ln_pi
        y = y + 0.00000001
        self.loss_action = F.softmax_cross_entropy(y, t)
        self.loss = self.loss_action
        self.accuracy = F.accuracy(y, t)
        reporter.report({'accuracy': self.accuracy}, self)
        reporter.report({'cross_entropy_loss': self.loss}, self)
        #print(y.shape)
        self.y = F.argmax(y, axis=1)
        #print(self.y)
        #self.t = F.argmax(t, axis=1)
        #reporter.report({'actual': t}, self)
        #reporter.report({'predicted': self.y}, self)
        if chainer.global_config.train:

            conditions = self.xp.argmax(y.data, axis=1) == t.data
            r = self.xp.where(conditions, 1., 0.).astype(self.xp.float32)
            r = self.xp.expand_dims(r, 1)
            # squared error between reward and baseline
            self.loss_baseline = F.mean_squared_error(r, b)
            self.loss += self.loss_baseline
            # loss with reinforce rule
            mean_ln_pi = sum_ln_pi / (self.n_steps - 1)
            a = F.sum(-mean_ln_pi * (r - b)) / batchsize
            self.reinforce_loss = F.sum(-mean_ln_pi * (r - b)) / batchsize
            self.loss += self.reinforce_loss
            reporter.report({'cross_entropy_loss': self.loss_action}, self)
            #reporter.report({'reinforce_loss': self.reinforce_loss}, self)
            #reporter.report({'total_loss': self.loss}, self)
            reporter.report({'train_accuracy': self.accuracy}, self)

        #print(self.loss)
        return self.loss
示例#9
0
    def compute_F(self, in_data, out_grad_data):
        x = in_data[0]
        gy = out_grad_data[0]
        ndim = len(x.shape)
        if ndim not in (2, 4):
            raise RuntimeError(
                'len(x.shape) must be 2 or 4, not {}.'.format(ndim))

        xp = cuda.get_array_module(x)
        n = x.shape[0]
        gy_scale = n
        if self._loss_scale is not None:
            gy_scale *= 1.0 / self._loss_scale

        # Re-compute BN forward with gamma=1 and beta=0
        avg_mean = self.link.avg_mean
        _gamma = xp.ones(avg_mean.shape, dtype=x.dtype)
        _beta = xp.zeros(avg_mean.shape, dtype=x.dtype)
        h = batch_normalization(x, _gamma, _beta, eps=self.link.eps).data

        if ndim == 2:
            gy = gy_scale * gy
            gyh = gy * h
        elif ndim == 4:
            # data layout of gy: NCHW
            h = h.transpose(0, 2, 3, 1)
            gy = gy.transpose(0, 2, 3, 1)

            # data layout of gy: NHWC
            gy = gy * gy_scale  # copy
            gyh = gy * h

            gyh = gyh.sum(axis=(1, 2))
            gy = gy.sum(axis=(1, 2))
            # data layout of gy: NC

        if self.link.beta is None:
            grad = gyh
        elif self.link.gamma is None:
            grad = gy
        else:
            grad = xp.hstack((gyh, gy))

        if self.diagonalize:
            if grad.dtype == xp.float16:
                grad = cast(grad, xp.float32).data
            F = xp.diag((grad * grad).mean(axis=0))
        else:
            F_scale = 1 / n
            if grad.dtype == xp.float16:
                grad = cast(grad, xp.float32).data
            F = grad.T.dot(grad) * F_scale

        return F
示例#10
0
 def test_forward_no_cast_grad(self):
     # This test would fail if F.cast does not create new function nodes for
     # no-op casts
     x = chainer.Variable(self.x)
     y1 = functions.cast(x, self.dtype)
     y2 = functions.cast(x, self.dtype)
     z = y1 + y2
     gy1, gy2 = chainer.grad([z], [y1, y2], [numpy.ones_like(z.data)])
     assert gy1.dtype == self.dtype
     assert gy2.dtype == self.dtype
     numpy.testing.assert_array_equal(gy1.data, numpy.ones_like(y1.data))
     numpy.testing.assert_array_equal(gy2.data, numpy.ones_like(y2.data))
示例#11
0
 def test_forward_no_cast_grad(self):
     # This test would fail if F.cast does not create new function nodes for
     # no-op casts
     x = chainer.Variable(self.x)
     y1 = functions.cast(x, self.dtype)
     y2 = functions.cast(x, self.dtype)
     z = y1 + y2
     gy1, gy2 = chainer.grad([z], [y1, y2], [numpy.ones_like(z.data)])
     assert gy1.dtype == self.dtype
     assert gy2.dtype == self.dtype
     numpy.testing.assert_array_equal(gy1.data, numpy.ones_like(y1.data))
     numpy.testing.assert_array_equal(gy2.data, numpy.ones_like(y2.data))
示例#12
0
def bger(x, y):
    """ Batch outer product

    :param x:
    :param y:
    :return:
    """
    if x.dtype == 'int' and y.dtype == 'int':
        x_float = F.cast(x, 'float32')
        y_float = F.cast(y, 'float32')
        res_float = F.expand_dims(x_float, 2) @ F.expand_dims(y_float, 1)
        return F.cast(res_float, 'int')
    return F.expand_dims(x, 2) @ F.expand_dims(y, 1)
示例#13
0
 def forward_chainer(self, inputs):
     if len(inputs) == 2:
         (x, w), b = inputs, None
     else:
         x, w, b = inputs
     if x.dtype.kind != 'f':
         x = F.cast(x, 'float64')
     if w.dtype.kind != 'f':
         w = F.cast(w, 'float64')
     if b is not None and b.dtype.kind != 'f':
         b = F.cast(b, 'float64')
     y = F.convolution_nd(x, w, b, self.stride, self.pad, self.cover_all)
     y = F.cast(y, self.out_dtype)
     return y,
示例#14
0
    def forward(self, x):
        """Normalize input and scale it.

        Args:
            x (chainer.Variable): A variable holding 4-dimensional array.
                Its :obj:`dtype` is :obj:`numpy.float32`.

        Returns:
            chainer.Variable:
            The shape and :obj:`dtype` are same as those of input.
        """
        x = F.cast(x, 'float32')
        x = F.normalize(x, eps=self.eps, axis=1)
        scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape)
        return F.cast(x * scale, chainer.get_dtype())
示例#15
0
 def __call__(self, x):
     x = F.cast(x, "int")
     if self.id_trans_fn is None:
         words = self.embed(x)
     else:
         words = self.embed(self.id_trans_fn(x))
     return words
示例#16
0
    def predict(self, imgs):
        """Conduct semantic segmentations from images.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their values are :math:`[0, 255]`.

        Returns:
            list of numpy.ndarray:

            List of integer labels predicted from each image in the input \
            list.

        """
        labels = []
        for img in imgs:
            C, H, W = img.shape
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():

                x = F.cast(self.xp.asarray(img[np.newaxis]), self.dtype)
                score = self.forward(x)[0].array.astype(np.float32)

            score = chainer.backends.cuda.to_cpu(score)
            if score.shape != (C, H, W):
                dtype = score.dtype
                score = resize(score, (H, W)).astype(dtype)

            label = np.argmax(score, axis=0).astype(np.int32)
            labels.append(label)
        return labels
示例#17
0
文件: model.py 项目: liuxingyuxx/ppn
 def _forward(self, x):
     h = F.cast(x, self.dtype)
     h = self.feature_layer(h)
     # self.last_activation = F.sigmoid
     h = self.feature_layer.last_activation(self.lastconv(h))
     #  x.shape, h.shape = (None, 3, 224, 224), (None, 440, 7, 7)
     return h
示例#18
0
 def compute_features(self, obs):
     obs = F.cast(obs, np.float32)
     obs = F.transpose(obs, (0, 3, 1, 2))
     h1 = F.relu(self.conv1(obs))
     h2 = F.relu(self.conv2(h1))
     h3 = F.relu(self.fc(h2))
     return h3
示例#19
0
def recalculate_bn_statistics(model, batchsize, dtype='float32'):
    print(
        '==> Recalculating BN statistics (batchsize={}) ...'.format(batchsize))
    train = CamVidDataset(split='train')
    it = chainer.iterators.SerialIterator(train,
                                          batchsize,
                                          repeat=False,
                                          shuffle=False)
    bn_avg_mean = defaultdict(np.float32)
    bn_avg_var = defaultdict(np.float32)

    if dtype == 'mixed16':
        dtype = 'float16'

    n_iter = 0
    for batch in it:
        imgs, _ = concat_examples(batch)

        model(F.cast(model.xp.array(imgs), dtype))
        for name, link in model.namedlinks():
            if name.endswith('_bn'):
                bn_avg_mean[name] += link.avg_mean
                bn_avg_var[name] += link.avg_var
        n_iter += 1

    for name, link in model.namedlinks():
        if name.endswith('_bn'):
            link.avg_mean = bn_avg_mean[name] / n_iter
            link.avg_var = bn_avg_var[name] / n_iter

    return model
示例#20
0
    def _decode_multiple(self, s, z=None, decode_num=10):
        if z is None:
            xp = chainer.backend.get_array_module(s)
            z = chainer.Variable(
                xp.random.normal(0,
                                 1,
                                 size=(s.shape[0], decode_num,
                                       self._latent_dim)))
            z = F.cast(z, typ=xp.float32)
            z = F.clip(z, -0.5, 0.5)

        s = F.expand_dims(s, axis=0)
        s = F.repeat(s, repeats=decode_num, axis=0)
        s = F.transpose(s, axes=(1, 0, 2))

        x = F.concat((s, z), axis=2)
        x = F.reshape(x, shape=(-1, x.shape[-1]))
        h = self._linear3(x)
        h = F.relu(h)
        h = self._linear4(h)
        h = F.relu(h)
        h = self._linear5(h)
        h = F.reshape(h, shape=(-1, decode_num, h.shape[-1]))

        return F.tanh(h), h
示例#21
0
 def __call__(self, x):
     heatmap = x
     vector_dim = 2
     batch = heatmap.shape[0]
     channels = heatmap.shape[1]
     in_size = x.shape[2:]
     heatmap_vector = F.reshape(heatmap, shape=(batch, channels, -1))
     indices = F.cast(
         F.expand_dims(F.argmax(heatmap_vector, axis=vector_dim),
                       axis=vector_dim), np.float32)
     scores = F.max(heatmap_vector, axis=vector_dim, keepdims=True)
     scores_mask = (scores.array > 0.0).astype(np.float32)
     pts_x = (indices.array % in_size[1]) * scores_mask
     pts_y = (indices.array // in_size[1]) * scores_mask
     pts = F.concat((pts_x, pts_y, scores), axis=vector_dim).array
     for b in range(batch):
         for k in range(channels):
             hm = heatmap[b, k, :, :].array
             px = int(pts_x[b, k])
             py = int(pts_y[b, k])
             if (0 < px < in_size[1] - 1) and (0 < py < in_size[0] - 1):
                 pts[b, k,
                     0] += np.sign(hm[py, px + 1] - hm[py, px - 1]) * 0.25
                 pts[b, k,
                     1] += np.sign(hm[py + 1, px] - hm[py - 1, px]) * 0.25
     return pts
示例#22
0
 def test_forward_no_cast_variable(self):
     # If backprop is disabled, it's safe to simply return the input
     # variable for no-op casts.
     x = chainer.Variable(self.x)
     with chainer.using_config('enable_backprop', False):
         y = functions.cast(x, self.dtype)
     assert y is x
示例#23
0
 def test_forward_no_cast_variable(self):
     # If backprop is disabled, it's safe to simply return the input
     # variable for no-op casts.
     x = chainer.Variable(self.x)
     with chainer.using_config('enable_backprop', False):
         y = functions.cast(x, self.dtype)
     assert y is x
示例#24
0
 def compute_features(self, obs):
     obs = F.cast(obs, np.float32)
     obs = F.transpose(obs, (0, 3, 1, 2))
     h1 = F.relu(self.conv1(obs))
     h2 = F.relu(self.conv2(h1))
     h3 = F.relu(self.fc(h2))
     return h3
示例#25
0
    def intersect_DIFF(self, ro, rd, t0, t1):
        xp = chainer.backend.get_array_module(ro)
        BB, _, H, W = ro.shape[:4]

        p0 = F.broadcast_to(self.p0.reshape((1, 3, 1, 1)), (BB, 3, H, W))
        p1 = F.broadcast_to(self.p1.reshape((1, 3, 1, 1)), (BB, 3, H, W))
        p2 = F.broadcast_to(self.p2.reshape((1, 3, 1, 1)), (BB, 3, H, W))
        fn = F.broadcast_to(self.fn.reshape((1, 3, 1, 1)), (BB, 3, H, W))
        face_id = F.broadcast_to(self.id, (BB, 1, H, W))
        eps = F.broadcast_to(self.eps.reshape((1, 1, 1, 1)), (BB, 1, H, W))

        aa = p0 - ro

        A = vdot(aa, fn)
        B = vdot(rd, fn)
        B = F.where(xp.abs(B.data) < eps.data, eps, B)

        tx = A / B
        p = ro + tx * rd
        n01 = vcross(p0 - p, p1 - p)
        n12 = vcross(p1 - p, p2 - p)
        n20 = vcross(p2 - p, p0 - p)

        MASK_P = is_positive(vdot(n01, n12))
        MASK_Q = is_positive(vdot(n12, n20))
        MASK_R = is_positive(vdot(n20, n01))

        # is_positive(F.absolute(B).reshape((B, 1, H, W)))
        MASK_B = is_positive(F.absolute(B))
        # print(MASK_B.shape)

        MASK_T0 = is_positive(tx - t0)
        MASK_T1 = is_positive(t1 - tx)

        b = F.cast(MASK_P * MASK_Q * MASK_R * MASK_B * MASK_T0 * MASK_T1,
                   'bool')
        #print("MASK_B", MASK_B.shape)
        #print("b", b.shape)
        t = F.where(b, tx, t1)
        p = ro + t * rd

        #print(p.shape, p.dtype)
        bn = F.cast(is_positive(vdot(rd, fn)), 'bool')
        n = F.where(bn, -fn, fn)

        return {'b': b, 't': t, 'p': p, 'n': n, 'face_id': face_id}
示例#26
0
    def logli(self, a):
        a = F.cast(a, np.float32)
        # transform back to standard normal
        zs = (a - self.means) * F.exp(-self.log_stds)

        # density of standard normal: f(z) = (2*pi*det|Σ|)^(-n/2) * exp(-|x|^2/2)
        # the return value should be log f(z)
        return - F.sum(self.log_stds, axis=-1) - \
            0.5 * F.sum(F.square(zs), axis=-1) - \
            0.5 * self.means.shape[-1] * np.log(2 * np.pi)
示例#27
0
 def __call__(self, x):
     h = F.cast(x, np.float16)
     h = F.relu(self.bn2(self.conv1(h)))
     h = self.res3(h)
     h = self.res4(h)
     h = self.res5(h)
     h = self.res6(h)
     h = F.average_pooling_2d(h, h.shape[2:])
     h = self.fc7(h)
     return h
示例#28
0
    def logli(self, a):
        a = F.cast(a, np.float32)
        # transform back to standard normal
        zs = (a - self.means) * F.exp(-self.log_stds)

        # density of standard normal: f(z) = (2*pi*det|Σ|)^(-n/2) * exp(-|x|^2/2)
        # the return value should be log f(z)
        return - F.sum(self.log_stds, axis=-1) - \
            0.5 * F.sum(F.square(zs), axis=-1) - \
            0.5 * self.means.shape[-1] * np.log(2 * np.pi)
示例#29
0
    def backward(self, indexes, grad_outputs):
        """ The gradient for the output will be scaled """
        x, W = self.get_retained_inputs()
        gy, = grad_outputs

        s_gy, u_gy = self.ada_loss.loss_scaling(gy, W)

        # Actual gradient calculation
        ret = []
        with chainer.using_config('use_ideep', self._config_use_ideep):
            if 0 in indexes:
                gx, = linear.LinearGradData().apply((W, s_gy))
                ret.append(F.cast(gx, x.dtype))
            if 1 in indexes:
                gW, = linear.LinearGradWeight(W.dtype).apply((x, u_gy))
                ret.append(F.cast(gW, W.dtype))
            if 2 in indexes:
                gb = chainer.functions.sum(u_gy, axis=0)
                ret.append(gb)

        return ret
示例#30
0
    def sampling(self, dist: ArrayLike, maximum=True):
        xp = self.xp

        if maximum:
            sampled = xp.argmax(F.softmax(dist, axis=1).data, axis=1)
        else:
            dist = F.cast(dist, xp.float64)
            prob = F.softmax(dist, axis=1).data
            sampled = xp.argmax(xp.log(prob) +
                                xp.random.gumbel(size=prob.shape),
                                axis=1)
        return sampled
示例#31
0
    def __call__(self, p, p_mask=None):
        xp.cuda.Device(self._device_id).use()

        p_len = p.shape[1]
        p_aug_i = F.tile(F.expand_dims(p, 2), (1, 1, p_len, 1))
        p_aug_j = F.tile(F.expand_dims(p, 1), (1, p_len, 1, 1))

        if p_mask is None:
            self_mask = None
        else:
            p_mask_aug_i = F.tile(F.expand_dims(p_mask, 2), (1, 1, p_len, 1))
            p_mask_aug_i = xp.any(F.cast(p_mask_aug_i, 'bool').data, axis=3)
            p_mask_aug_j = F.tile(F.expand_dims(p_mask, 1), (1, p_len, 1, 1))
            p_mask_aug_j = xp.any(F.cast(p_mask_aug_j, 'bool').data, axis=3)
            self_mask = p_mask_aug_i & p_mask_aug_j

        h_logits = get_logits(self.logits_linear, [p_aug_i, p_aug_j],
                              self_mask)  # ->(N,48,48)
        self_att = softsel(p_aug_j, h_logits)  # ->(N,48,448)
        out = self.fuse_gate(p, self_att)

        return out
示例#32
0
    def compute_kfgrads(self, W, b, invs):
        xp = cuda.get_array_module(W.data)
        A_inv, G_inv = invs
        grad = W.grad
        if b is not None:
            grad = xp.column_stack([grad, b.grad])

        out_dtype = grad.dtype
        if A_inv.dtype != grad.dtype:
            grad = cast(grad, A_inv.dtype).data

        kfgrad = xp.dot(xp.dot(G_inv, grad), A_inv)

        return kfgrad.astype(out_dtype)
示例#33
0
    def compute_A(self, in_data):
        x = in_data[0]
        xp = cuda.get_array_module(x)
        n, _ = x.shape
        if self.link.b is not None:
            ones = xp.ones(n, dtype=x.dtype)
            x = xp.column_stack((x, ones))

        if x.dtype == xp.float16:
            x = cast(x, xp.float32).data

        A = (x * x).mean(axis=0) if self.diagonalize else x.T.dot(x) * (1 / n)

        return A
示例#34
0
    def backward(self, indexes, grad_outputs):
        """ The gradient for the output will be scaled """
        x, W = self.get_retained_inputs()
        gy, = grad_outputs
        gy_, prev_scale = self.ada_loss.loss_scaling(gy, W)

        ret = []
        with chainer.using_config('use_ideep', self._config_use_ideep):
            if 0 in indexes:
                gx, = linear.LinearGradData().apply((W, gy_))
                self.ada_loss.set_loss_scale(
                    gx, self.ada_loss.grad_loss_scale(gy_))
                ret.append(F.cast(gx, x.dtype))
            if 1 in indexes:
                gW, = linear.LinearGradWeight(W.dtype).apply((x, gy))
                gW_ = self.ada_loss.get_unscaled_gradient(gW, prev_scale)
                ret.append(F.cast(gW_, W.dtype))
            if 2 in indexes:
                gb = chainer.functions.sum(gy, axis=0)
                gb_ = self.ada_loss.get_unscaled_gradient(gb, prev_scale)
                ret.append(gb_)

        return ret
示例#35
0
 def test_forward_no_cast_variable(self):
     x = chainer.Variable(self.x)
     y = functions.cast(x, self.dtype)
     self.assertIs(y, x)
示例#36
0
 def test_forward_no_cast_array(self):
     y = functions.cast(self.x, self.dtype)
     self.assertIsInstance(y, chainer.Variable)
     self.assertIs(y.data, self.x)
示例#37
0
 def check_forward_no_cast(self, x_data):
     y = functions.cast(x_data, self.dtype)
     self.assertIsInstance(y, chainer.Variable)
     self.assertIs(y.data, x_data)
示例#38
0
 def func(x):
     return functions.cast(x, self.out_type)
示例#39
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.cast(x, self.out_type)
     self.assertEqual(y.data.shape, x.data.shape)
     self.assertEqual(y.data.dtype, self.out_type)
示例#40
0
 def check_forward(self, x_data):
     x = chainer.Variable(x_data)
     y = functions.cast(x, self.out_type)
     assert y.data.shape == x.data.shape
     assert y.data.dtype == self.out_type
示例#41
0
 def compute_features(self, obs):
     obs = F.cast(obs, np.float32)
     h = obs
     for link in self.feature_links().values():
         h = self.hidden_nonlinearity(link(h))
     return h
示例#42
0
文件: alex.py 项目: Fhrozen/chainer
 def __call__(self, x, t):
     return Alex.__call__(self, F.cast(x, self.dtype), t)
示例#43
0
    def __call__(self, from_tensor, to_tensor,
                 attention_mask=None,
                 do_return_2d_tensor=False):
        """
        Args:
          from_tensor: float Tensor of shape [batch_size, from_seq_length, from_width].
          to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
          attention_mask: (optional) int32 Tensor of shape [batch_size,
            from_seq_length, to_seq_length]. The values should be 1 or 0. The
            attention scores will effectively be set to -infinity for any positions in
            the mask that are 0, and will be unchanged for positions that are 1.
          do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
            * from_seq_length, num_attention_heads * size_per_head]. If False, the
            output will be of shape [batch_size, from_seq_length, num_attention_heads
            * size_per_head].

        Returns:
          float Tensor of shape [batch_size, from_seq_length,
            num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
            true, this will be of shape [batch_size * from_seq_length,
            num_attention_heads * size_per_head]).
        """
        def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
                                 seq_length, width):
            """
            output_tensor = F.stack(
                F.split_axis(input_tensor, num_attention_heads, axis=1),
                axis=1)
            # batch_size * seq_length, num_attention_heads, width

            output_tensor = F.stack(
                F.split_axis(output_tensor, seq_length, axis=0),
                axis=2)
            batch_size, num_attention_heads, seq_length, width
            """
            output_tensor = F.reshape(
                input_tensor,
                (batch_size, seq_length, num_attention_heads, width))
            output_tensor = F.transpose(output_tensor, [0, 2, 1, 3])
            return output_tensor

        from_shape = from_tensor.shape
        to_shape = to_tensor.shape

        if len(from_shape) != len(to_shape):
            raise ValueError(
                "The rank of `from_tensor` must match the rank of `to_tensor`.")

        if len(from_shape) == 3:
            batch_size = from_shape[0]
            from_seq_length = from_shape[1]
            to_seq_length = to_shape[1]
        elif len(from_shape) == 2:
            # TODO right?
            assert attention_mask is not None
            batch_size = attention_mask.shape[0]
            from_seq_length = attention_mask.shape[1]
            to_seq_length = attention_mask.shape[2]
            if (batch_size is None or from_seq_length is None or to_seq_length is None):
                raise ValueError(
                    "When passing in rank 2 tensors to attention_layer, the values "
                    "for `batch_size`, `from_seq_length`, and `to_seq_length` "
                    "must all be specified.")

        # Scalar dimensions referenced here:
        #   B = batch size (number of sequences)
        #   F = `from_tensor` sequence length
        #   T = `to_tensor` sequence length
        #   N = `num_attention_heads`
        #   H = `size_per_head`

        from_tensor_2d = reshape_to_matrix(from_tensor)
        to_tensor_2d = reshape_to_matrix(to_tensor)

        # `query_layer` = [B*F, N*H]
        query_layer = self.query(from_tensor_2d)
        # `key_layer` = [B*T, N*H]
        key_layer = self.key(to_tensor_2d)
        # `value_layer` = [B*T, N*H]
        value_layer = self.value(to_tensor_2d)

        # `query_layer` = [B, N, F, H]
        query_layer = transpose_for_scores(
            query_layer, batch_size,
            self.num_attention_heads, from_seq_length, self.size_per_head)

        # `key_layer` = [B, N, T, H]
        key_layer = transpose_for_scores(
            key_layer, batch_size,
            self.num_attention_heads, to_seq_length, self.size_per_head)

        # Take the dot product between "query" and "key" to get the raw
        # attention scores.
        # `attention_scores` = [B, N, F, T]
        attention_scores = F.matmul(query_layer, key_layer, transb=True)
        attention_scores *= 1.0 / np.sqrt(self.size_per_head)

        if attention_mask is not None:
            # `attention_mask` = [B, 1, F, T]
            # attention_mask = F.expand_dims(attention_mask, axis=1)
            attention_mask = attention_mask[:, None]

            # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
            # masked positions, this operation will create a tensor which is 0.0 for
            # positions we want to attend and -10000.0 for masked positions.
            # adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0
            adder = F.cast(1.0 - attention_mask, 'float32') * -10000.0

            # Since we are adding it to the raw scores before the softmax, this is
            # effectively the same as removing these entirely.
            attention_scores += F.broadcast_to(adder, attention_scores.shape)

        # Normalize the attention scores to probabilities.
        # `attention_probs` = [B, N, F, T]
        # (default softmax's axis is -1 in tf while 1 in chainer)
        # attention_probs = tf.nn.softmax(attention_scores)  # tf original
        attention_probs = F.softmax(attention_scores, axis=3)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = F.dropout(
            attention_probs, self.attention_probs_dropout_prob)

        # `value_layer` = [B, T, N, H]
        value_layer = F.reshape(
            value_layer,
            [batch_size, to_seq_length, self.num_attention_heads, self.size_per_head])

        # `value_layer` = [B, N, T, H]
        value_layer = F.transpose(value_layer, [0, 2, 1, 3])

        # `context_layer` = [B, N, F, H]
        context_layer = F.matmul(attention_probs, value_layer)  # right?

        # `context_layer` = [B, F, N, H]
        context_layer = F.transpose(context_layer, [0, 2, 1, 3])

        if do_return_2d_tensor:
            # `context_layer` = [B*F, N*V]
            context_layer = F.reshape(
                context_layer,
                [batch_size * from_seq_length, self.num_attention_heads * self.size_per_head])
            # right?
        else:
            # `context_layer` = [B, F, N*V]
            context_layer = F.reshape(
                context_layer,
                [batch_size, from_seq_length, self.num_attention_heads * self.size_per_head])

        return context_layer
示例#44
0
 def forward(self, x, t):
     return GoogLeNetBN.forward(self, F.cast(x, self.dtype), t)
示例#45
0
文件: alex.py 项目: jnishi/chainer
 def forward(self, x, t):
     return Alex.forward(self, F.cast(x, self.dtype), t)
示例#46
0
 def forward(self, inputs, device):
     x, = inputs
     function = getattr(functions, self.function_name)
     y = function(x, axis=self.axis)
     y = functions.cast(y, numpy.int64)
     return y,
示例#47
0
 def forward(self, inputs, device):
     p, x, y = inputs
     ret = functions.linear_interpolate(p, x, y)
     ret = functions.cast(ret, numpy.float64)
     return ret,
示例#48
0
 def __call__(self, x, t):
     return GoogLeNetBN.__call__(self, F.cast(x, self.dtype), t)
示例#49
0
 def check_forward_no_cast(self, x_data):
     y = functions.cast(x_data, self.dtype)
     assert isinstance(y, chainer.Variable)
     assert y.data is x_data
示例#50
0
 def test_forward_no_cast_array(self):
     y = functions.cast(self.x, self.dtype)
     assert isinstance(y, chainer.Variable)
     assert y.data is self.x