Exemplo n.º 1
0
def ssd_separate_conf_pos_neg(_ssd_conf):
    # input
    # _ssd_conf : type=nn.Variable, shape=(batch_size, default boxes, pos num + neg num)

    # output
    # ssd_pos_conf : type=nn.Variable, shape=(batch_size, default boxes, pos num)
    # ssd_neg_conf : type=nn.Variable, shape=(batch_size, default boxes, neg num)

    ssd_pos_conf = F.slice(
                        _ssd_conf, 
                        start=(0,0,0),  
                        stop=(
                                _ssd_conf.shape[0], 
                                _ssd_conf.shape[1], 
                                _ssd_conf.shape[2] - 1
                            ), 
                        step=(1,1,1)
                        )
    ssd_neg_conf = F.slice(
                        _ssd_conf, 
                        start=(0,0,_ssd_conf.shape[2] - 1),  
                        stop=(
                                _ssd_conf.shape[0], 
                                _ssd_conf.shape[1], 
                                _ssd_conf.shape[2]
                            ), 
                        step=(1,1,1)
                        )
    return ssd_pos_conf, ssd_neg_conf
Exemplo n.º 2
0
def celu_backward(inputs, alpha=1.0, axis=1):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    dy = inputs[0]
    x0 = inputs[1]

    fstart, fstop, fstep = create_slice(dy.shape, axis, True)
    bstart, bstop, bstep = create_slice(dy.shape, axis, False)
    dy0 = F.slice(dy, fstart, fstop, fstep)
    dy1 = F.slice(dy, bstart, bstop, bstep)
    aep = alpha * F.exp(x0)
    aen = alpha * F.exp(-x0)

    m0 = F.greater_scalar(x0, 0)
    m1 = 1 - m0
    m0 = no_grad(m0)
    m1 = no_grad(m1)
    dx00 = dy0 * (m0 + aep * m1)
    dx01 = dy1 * (m1 + aen * m0)

    dx = dx00 - dx01
    return dx
Exemplo n.º 3
0
def yolov2_activate(x, anchors, biases):
    shape = x.shape
    y = F.reshape(x, (
        shape[0],
        anchors,
        -1,
    ) + shape[2:])
    stop = list(y.shape)
    stop[2] = 2
    t_xy = F.slice(y, (0, 0, 0, 0, 0), stop)
    stop[2] = 4
    t_wh = F.slice(y, (0, 0, 2, 0, 0), stop)
    stop[2] = 5
    t_o = F.slice(y, (0, 0, 4, 0, 0), stop)
    stop[2] = y.shape[2]
    t_p = F.slice(y, (0, 0, 5, 0, 0), stop)
    t_xy = F.sigmoid(t_xy)
    t_wh = F.exp(t_wh)
    t_o = F.sigmoid(t_o)
    t_p = F.softmax(t_p, axis=2)
    t_x, t_y, t_wh = yolov2_image_coordinate(t_xy, t_wh, biases)
    y = F.concatenate(t_x, t_y, t_wh, t_o, t_p, axis=2)
    y = F.transpose(y, (0, 1, 3, 4, 2)).reshape(
        (shape[0], -1, shape[1] / anchors))
    return y
Exemplo n.º 4
0
def lstm_cell(x, c, h):
    batch_size, units = c.shape
    _hidden = PF.affine(F.concatenate(x, h, axis=1), 4*units)

    a            = F.tanh   (F.slice(_hidden, start=(0, units*0), stop=(batch_size, units*1)))
    input_gate   = F.sigmoid(F.slice(_hidden, start=(0, units*1), stop=(batch_size, units*2)))
    forgate_gate = F.sigmoid(F.slice(_hidden, start=(0, units*2), stop=(batch_size, units*3)))
    output_gate  = F.sigmoid(F.slice(_hidden, start=(0, units*3), stop=(batch_size, units*4)))

    cell = input_gate * a + forgate_gate * c
    hidden = output_gate * F.tanh(cell)
    return cell, hidden
Exemplo n.º 5
0
 def jacobian(self, coordinates):
     new_coordinates = self.warp_coordinates(coordinates)
     new_coordinates_x = F.slice(new_coordinates, start=(
         0, 0, 0), stop=new_coordinates.shape[:2] + (1,))
     grad_x = nn.grad([F.sum(new_coordinates_x)], [coordinates])
     new_coordinates_y = F.slice(new_coordinates, start=(
         0, 0, 1), stop=new_coordinates.shape[:2] + (2,))
     grad_y = nn.grad([F.sum(new_coordinates_y)], [coordinates])
     gx = F.reshape(grad_x[0], grad_x[0].shape[:-1] +
                    (1,) + grad_x[0].shape[-1:])
     gy = F.reshape(grad_y[0], grad_y[0].shape[:-1] +
                    (1,) + grad_y[0].shape[-1:])
     jacobian = F.concatenate(gx, gy, axis=gy.ndim-2)
     return jacobian
Exemplo n.º 6
0
    def backward_impl(self, inputs, outputs, prop_down, accum):
        # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or
        # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph]

        # Args
        start = self.forward_func.info.args["start"]
        stop = self.forward_func.info.args["stop"]
        step = self.forward_func.info.args["step"]
        # Inputs
        x0 = inputs[0].data
        dy = inputs[1].data
        # Outputs
        dx0 = outputs[0].data
        # Grads of inputs
        g_x0 = inputs[0].grad
        g_dy = inputs[1].grad
        # Grads of outputs
        g_dx0 = outputs[0].grad

        # Computation
        if prop_down[1]:
            g_dx0_ = F.slice(g_dx0, start, stop, step)
            if accum[1]:
                g_dy += g_dx0_
            else:
                g_dy.copy_from(g_dx0_)
Exemplo n.º 7
0
def factorized_reduction(x, output_filter, scope, test):
    """
        Applying spatial reduction to input variable.
        Input variable is passed to:
        Skip path 1, applied average pooling with stride 2.
        Skip path 2, first padded with 0 on the right and bottom, 
                     then shifted by 1 (so that those 0-padded sides will be added, 
                     whereas its shape is the same as the original),
        Then these 2 variables are concatenated along the depth dimension.
    """
    with nn.parameter_scope(scope):
        path1 = F.average_pooling(x, (1, 1), (2, 2))
        with nn.parameter_scope("path1_conv"):
            path1 = PF.convolution(
                path1, output_filter // 2, (1, 1), with_bias=False)

        path2 = F.pad(x, (0, 1, 0, 1), mode='constant')
        path2 = F.slice(path2, (0, 0, 1, 1))
        path2 = F.average_pooling(path2, (1, 1), (2, 2))
        with nn.parameter_scope("path2_conv"):
            path2 = PF.convolution(
                path2, output_filter // 2, (1, 1), with_bias=False)

        final_path = F.concatenate(path1, path2, axis=1)
        with nn.parameter_scope("reduction_bn"):
            final_path = PF.batch_normalization(
                final_path, batch_stat=not test)

    return final_path
Exemplo n.º 8
0
def _in_projection_packed(q, k, v, w, b):
    if k is v:
        if q is k:
            # self-attention
            w = F.transpose(w, (1, 0))
            to_ret = F.affine(q, w, b, base_axis=2)
            ind = -(-to_ret.size_from_axis(2) // 3)
            a, b, c = to_ret.shape
            return F.slice(to_ret, (0, 0, 0), (a, b, ind)), F.slice(
                to_ret, (0, 0, ind),
                (a, b, ind * 2)), F.slice(to_ret, (0, 0, ind * 2), (a, b, c))
        else:
            # encoder-decoder attention
            raise NotImplementedError()
    else:
        raise NotImplementedError()
Exemplo n.º 9
0
def factorized_reduction(x, output_filter, scope, test, is_search):
    """
        Applying spatial reduction to input variable.
    """
    assert output_filter % 2 == 0
    x = F.relu(x)
    with nn.parameter_scope(scope):
        with nn.parameter_scope("conv_1"):
            conv_1 = PF.convolution(x,
                                    output_filter // 2, (1, 1),
                                    pad=None,
                                    stride=(2, 2),
                                    with_bias=False)

        conv_2 = F.pad(x, (0, 1, 0, 1), mode='constant')
        conv_2 = F.slice(conv_2, (0, 0, 1, 1))

        with nn.parameter_scope("conv_2"):
            conv_2 = PF.convolution(conv_2,
                                    output_filter // 2, (1, 1),
                                    pad=None,
                                    stride=(2, 2),
                                    with_bias=False)

        final_conv = F.concatenate(conv_1, conv_2, axis=1)

        with nn.parameter_scope("reduction_bn"):
            final_conv = PF.batch_normalization(final_conv,
                                                batch_stat=not test,
                                                fix_parameters=is_search)
    return final_conv
Exemplo n.º 10
0
def build_model():
    x = nn.Variable((batch_size, sentence_length_source))
    input_mask = F.sign(
        F.reshape(F.slice(x), (batch_size, sentence_length_source, 1)))
    y = nn.Variable((batch_size, sentence_length_target))

    enc_input = time_distributed(PF.embed)(x,
                                           vocab_size_source,
                                           embedding_size,
                                           name='enc_embeddings')  #*input_mask
    # -> (batch_size, sentence_length_source, embedding_size)
    dec_input = time_distributed(PF.embed)(y,
                                           vocab_size_target,
                                           embedding_size,
                                           name='dec_embeddings')
    # -> (batch_size, sentence_length_target, embedding_size)

    # encoder
    with nn.parameter_scope('encoder'):
        output, c, h = LSTMEncoder(enc_input,
                                   hidden,
                                   return_sequences=True,
                                   return_state=True)
        # -> (batch_size, sentence_length_source, hidden), (batch_size, hidden), (batch_size, hidden)

    # decoder
    output = LSTMAttentionDecoder(dec_input,
                                  output,
                                  initial_state=(c, h),
                                  return_sequences=True,
                                  name='decoder')
    # -> (batch_size, sentence_length_target, hidden)
    output = time_distributed(PF.affine)(output,
                                         vocab_size_target,
                                         name='output')
    # -> (batch_size, sentence_length_target, vocab_size_target)

    t = F.reshape(F.slice(y), (batch_size, sentence_length_target, 1))

    entropy = time_distributed_softmax_cross_entropy(output, t)

    mask = F.sum(F.sign(t), axis=2)  # do not predict 'pad'.
    count = F.sum(mask, axis=1)

    entropy *= mask
    loss = F.mean(F.sum(entropy, axis=1) / count)
    return x, y, loss
Exemplo n.º 11
0
def vision_transformer(x, input_res, patch_size, v_width, v_layers, v_heads,
                       embed_dim):
    scale = v_width**-0.5

    with nn.parameter_scope("visual"):
        con1_w = nn.parameter.get_parameter_or_create(name="conv1/W",
                                                      shape=(v_width, 3,
                                                             patch_size,
                                                             patch_size))
        x = F.convolution(
            x, con1_w, bias=None,
            stride=(patch_size, patch_size))  # shape = [*, width, grid, grid]

        # shape = [*, width, grid ** 2]
        x = F.reshape(x, (x.shape[0], x.shape[1], -1))
        x = F.transpose(x, (0, 2, 1))  # shape = [*, grid ** 2, width]

        z = np.zeros((x.shape[0], 1, x.shape[-1]))
        zeros = nn.Variable.from_numpy_array(z)
        class_embed = nn.parameter.get_parameter_or_create(
            name="class_embedding", shape=(v_width, )).reshape(
                (x.shape[0], 1, v_width))
        # shape = [*, grid ** 2 + 1, width]
        x = F.concatenate(class_embed + zeros, x, axis=1)

        positional_embedding = nn.parameter.get_parameter_or_create(
            name='positional_embedding',
            shape=((input_res // patch_size)**2 + 1, v_width)).reshape(
                (x.shape[0], x.shape[1], v_width))
        x = x + positional_embedding

        ln_pre_w = nn.parameter.get_parameter_or_create(
            name="ln_pre/W", shape=(v_width, )).reshape((1, 1, v_width))
        ln_pre_b = nn.parameter.get_parameter_or_create(
            name="ln_pre/b", shape=(v_width, )).reshape((1, 1, v_width))
        x = F.layer_normalization(x, ln_pre_b, ln_pre_w, batch_axis=(0, 1))

        x = F.transpose(x, (1, 0, 2))  # NLD -> LND

        x = transformer(x, v_width, v_layers, v_heads)

        x = F.transpose(x, (1, 0, 2))  # LND -> NLD

        ln_post_w = nn.parameter.get_parameter_or_create(
            name="ln_post/W", shape=(v_width, )).reshape((1, 1, v_width))
        ln_post_b = nn.parameter.get_parameter_or_create(
            name="ln_post/b", shape=(v_width, )).reshape((1, 1, v_width))
        x = F.slice(x, stop=(x.shape[0], 1, x.shape[2]))
        x = F.layer_normalization(x, ln_post_b, ln_post_w)

        if 'proj' in nn.get_parameters():
            visual_proj = nn.parameter.get_parameter_or_create(
                name="proj", shape=(v_width, embed_dim)).reshape(
                    (1, v_width, -1))
            x = F.batch_matmul(x, visual_proj)

        x = x.reshape((-1, embed_dim))

    return x
 def crop(tensor, target_times):
     shape = tensor.shape[2]
     diff = shape - target_times
     if diff == 0:
         return tensor
     crop_start = diff // 2
     crop_end = diff - crop_start
     return F.slice(tensor, start=(0, 0, crop_start), stop=(tensor.shape[0], tensor.shape[1], shape - crop_end), step=(1, 1, 1))
Exemplo n.º 13
0
def test_slice_arguments(indices):
    import nnabla.functions as F
    init, start, stop, step = indices
    x = nn.Variable(init)
    y = F.slice(x, start, stop, step)
    if step[0] > 0:
        z = x[:, :5]
    else:
        z = x[::-1, :-6:-1]

    assert y.parent.arguments == z.parent.arguments
 def downsampling_block(x, i):
     with nn.parameter_scope(('ds_block-%2d' % i)):
         ds = af(
             conv(x, (num_initial_filters + num_initial_filters * i),
                  (filter_size, ), (7, ),
                  name='conv'))
         ds_slice = F.slice(ds,
                            start=(0, 0, 0),
                            stop=ds.shape,
                            step=(1, 1, 2))  # Decimate by factor of 2
         #ds_slice = F.average_pooling(ds, kernel=(1, 1,), stride=(1, 2,), pad=(0, 0,))
         return ds, ds_slice
Exemplo n.º 15
0
 def network(self, x_in, name='LSTM', n_hidden=32):
     hlist = []
     for x_i in F.split(x_in, axis=1):
         self._h, self._c = self._lstm_cell(name, n_hidden, x_i, self._h, self._c)
         with nn.parameter_scope(name + '_Affine_2'):
             self._h = PF.affine(self._h, (self._cols_size,))
         hlist.append(self._h)
     h = F.stack(*hlist, axis=1)
     h = F.slice(h, start=[0, h.shape[1]-self._x_output_length, 0],
             stop=[self._batch_size, h.shape[1], self._cols_size],
             step=[1, 1, 1])
     return h
Exemplo n.º 16
0
def slice_data_grad_backward(inputs, start=None, stop=None, step=None):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    gdx = inputs[0]
    gdy = F.slice(gdx, start, stop, step)
    return gdy
Exemplo n.º 17
0
def test_slice_forward_special(seed, inshape, start, stop, step, ctx, fname):
    x_data = np.random.rand(*inshape)
    # Numpy
    s = [slice(start[axis], stop[axis], step[axis])
         for axis in range(len(start))]
    x_data_key = ref_slice(x_data, start, stop, step)

    # NNabla
    with nn.context_scope(ctx):
        x = nn.Variable.from_numpy_array(x_data)
        x_key = F.slice(x, start, stop, step)
        x_key.forward()

    assert_allclose(x_data_key, x_key.d)
Exemplo n.º 18
0
def network(x, d1, c1, d2, c2, test=False):
    # Input:x -> 1
    # OneHot -> 687
    h = F.one_hot(x, (687, ))

    # LSTM1 -> 200
    with nn.parameter_scope('LSTM1'):
        h = network_LSTM(h, d1, c1, 687, 100, test)

    # Slice -> 100
    h1 = F.slice(h, (0, ), (100, ), (1, ))

    # h2:CellOut -> 100
    h2 = F.slice(h, (100, ), (200, ), (1, ))

    # LSTM2 -> 128
    with nn.parameter_scope('LSTM2'):
        h3 = network_LSTM(h1, d2, c2, 100, 64, test)

    # h4:DelayOut
    h4 = F.identity(h1)

    # Slice_2 -> 64
    h5 = F.slice(h3, (0, ), (64, ), (1, ))

    # h6:CellOut_2 -> 64
    h6 = F.slice(h3, (64, ), (128, ), (1, ))

    # Affine_2 -> 687
    h7 = PF.affine(h5, (687, ), name='Affine_2')

    # h8:DelayOut_2
    h8 = F.identity(h5)
    # h7:Softmax
    h7 = F.softmax(h7)
    return h2, h4, h6, h8, h7
Exemplo n.º 19
0
def chunk(x, num_chunk, axis):
    """
    Split `x` to `num_chunk` arrays along specified axis.
    """
    shape = x.shape
    C = shape[axis]
    num_elems = (C + num_chunk - 1) // num_chunk

    ret = []
    for i in range(num_chunk):
        start = [0 for _ in shape]
        stop = [s for s in shape]
        start[axis] = i * num_elems
        stop[axis] = start[axis] + num_elems

        segment = F.slice(x, start=start, stop=stop)
        assert len(segment.shape) == len(x.shape)
        ret.append(segment)

    return ret
Exemplo n.º 20
0
def test_slice_forward_special_case(seed, inshape, start, stop, step,
                                    empty_case, ctx, func_name):
    if empty_case:
        pytest.skip("Empty-NdArray raises error as NNabla specification")

    x_data = np.random.rand(*inshape)
    # Numpy
    s = [
        slice(start[axis], stop[axis], step[axis])
        for axis in range(len(start))
    ]
    x_data_key = ref_slice(x_data, start, stop, step)

    # NNabla
    with nn.context_scope(ctx):
        x = nn.Variable.from_numpy_array(x_data)
        x_key = F.slice(x, start, stop, step)
        x_key.forward()

    assert np.allclose(x_data_key, x_key.d)
Exemplo n.º 21
0
def image_preprocess(image, img_size=224, data_size=320, test=False):
    h, w = image.shape[2:]
    image = image / 255.0
    if test:
        _img_size = data_size * 0.875  # Ratio of size is 87.5%
        hs = (h - _img_size) / 2
        ws = (w - _img_size) / 2
        he = (h + _img_size) / 2
        we = (w + _img_size) / 2
        image = F.slice(image, (0, ws, hs), (3, we, he), (1, 1, 1))
        image = F.image_augmentation(image, (3, img_size, img_size),
                                     min_scale=0.8,
                                     max_scale=0.8)
    else:
        size = min(h, w)
        min_size = img_size * 1.1
        max_size = min_size * 2
        min_scale = min_size / size
        max_scale = max_size / size
        image = F.image_augmentation(image, (3, img_size, img_size),
                                     pad=(0, 0),
                                     min_scale=min_scale,
                                     max_scale=max_scale,
                                     angle=0.5,
                                     aspect_ratio=1.3,
                                     distortion=0.2,
                                     flip_lr=True,
                                     flip_ud=False,
                                     brightness=0.0,
                                     brightness_each=True,
                                     contrast=1.1,
                                     contrast_center=0.5,
                                     contrast_each=True,
                                     noise=0.0)
    image = image - 0.5
    return image
Exemplo n.º 22
0
def augment(batch, aug_list, p_aug=1.0):

    if isinstance(p_aug, float):
        p_aug = nn.Variable.from_numpy_array(p_aug * np.ones((1,)))

    if "flip" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        batch_aug = F.random_flip(batch, axes=(2, 3))
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "lrflip" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        batch_aug = F.random_flip(batch, axes=(3,))
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "translation" in aug_list and batch.shape[2] >= 8:
        rnd = F.rand(shape=[batch.shape[0], ])
        # Currently nnabla does not support random_shift with border_mode="noise"
        mask = np.ones((1, 3, batch.shape[2], batch.shape[3]))
        mask[:, :, :, 0] = 0
        mask[:, :, :, -1] = 0
        mask[:, :, 0, :] = 0
        mask[:, :, -1, :] = 0
        batch_int = F.concatenate(
            batch, nn.Variable().from_numpy_array(mask), axis=0)
        batch_int_aug = F.random_shift(batch_int, shifts=(
            batch.shape[2]//8, batch.shape[3]//8), border_mode="nearest")
        batch_aug = F.slice(batch_int_aug, start=(
            0, 0, 0, 0), stop=batch.shape)
        mask_var = F.slice(batch_int_aug, start=(
            batch.shape[0], 0, 0, 0), stop=batch_int_aug.shape)
        batch_aug = batch_aug * F.broadcast(mask_var, batch_aug.shape)
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "color" in aug_list:
        rnd = F.rand(shape=[batch.shape[0], ])
        rnd_contrast = 1.0 + 0.5 * \
            (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1]
                          ) - 1.0)  # from 0.5 to 1.5
        rnd_brightness = 0.5 * \
            (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1]
                          ) - 1.0)  # from -0.5 to 0.5
        rnd_saturation = 2.0 * \
            F.rand(shape=[batch.shape[0], 1, 1, 1])  # from 0.0 to 2.0
        # Brightness
        batch_aug = batch + rnd_brightness
        # Saturation
        mean_s = F.mean(batch_aug, axis=1, keepdims=True)
        batch_aug = rnd_saturation * (batch_aug - mean_s) + mean_s
        # Contrast
        mean_c = F.mean(batch_aug, axis=(1, 2, 3), keepdims=True)
        batch_aug = rnd_contrast * (batch_aug - mean_c) + mean_c
        batch = F.where(
            F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch)

    if "cutout" in aug_list and batch.shape[2] >= 16:
        batch = F.random_erase(batch, prob=p_aug.d[0], replacements=(0.0, 0.0))

    return batch
Exemplo n.º 23
0
def decoder(target_action,
            target_action_type,
            target_node_type,
            target_parent_rule,
            target_parent_index,
            query_embed,
            query_embed_mask,
            rule_num,
            token_num,
            node_type_num,
            embedding_size,
            node_type_embedding_size,
            state_size,
            hidden_size,
            previous_action_embed=None,
            initial_state=None,
            initial_cell=None,
            hist=None,
            dropout=0.0,
            train=True):
    """
    target_action: (batch_size, max_action_length, 3)
    target_action_type: (batch_size, max_action_length, 3)
    target_node_type: (batch_size, max_action_length)
    target_parent_rule: (batch_size, max_action_length)
    target_parent_index: (batch_size, max_action_length)
    """
    batch_size, max_action_length, _ = target_action.shape

    # Node type ebedding
    with nn.parameter_scope("node_type_embedding"):
        target_node_type_embed = embedding(target_node_type,
                                           node_type_num,
                                           node_type_embedding_size,
                                           mask_zero=False,
                                           init=I.NormalInitializer(0.01))

    # Previous action embedding
    ## (batch_size, max_action_length)
    target_apply_rule, target_gen_token, target_copy_token = split(
        target_action, axis=2)
    with nn.parameter_scope("rule_embedding"):
        # (batch_size, max_action_length, embedding_size)
        target_apply_rule_embed = embedding(target_apply_rule,
                                            rule_num,
                                            embedding_size,
                                            mask_zero=False,
                                            init=I.NormalInitializer(0.01))
        target_apply_rule_embed = F.reshape(
            target_apply_rule_embed,
            (batch_size, max_action_length, 1, embedding_size))
    with nn.parameter_scope("token_embedding"):
        # (batch_size, max_action_length, embedding_size)
        target_gen_token_embed = embedding(target_gen_token,
                                           token_num,
                                           embedding_size,
                                           mask_zero=False,
                                           init=I.NormalInitializer(0.01))
        target_gen_token_embed = F.reshape(
            target_gen_token_embed,
            (batch_size, max_action_length, 1, embedding_size))
    target_copy_token = F.reshape(target_copy_token,
                                  (batch_size, max_action_length, 1, 1))
    target_copy_token = F.broadcast(
        target_copy_token, (batch_size, max_action_length, 1, embedding_size))
    target_copy_token *= 0
    # (batch_size, max_action_length, 3, embedding_size)
    target_action_embed = concatenate(target_apply_rule_embed,
                                      target_gen_token_embed,
                                      target_copy_token,
                                      axis=2)
    target_action_type2 = F.reshape(target_action_type,
                                    (batch_size, max_action_length, 3, 1))
    target_action_type2 = F.broadcast(
        target_action_type2,
        (batch_size, max_action_length, 3, embedding_size))
    # (batch_size, max_action_length, 3, embedding_size)
    target_action_embed = target_action_embed * target_action_type2
    # (batch_size, max_action_length, embedding_size)
    target_action_embed = F.sum(target_action_embed, axis=2)

    # Shift action
    if previous_action_embed is None:
        previous_action_embed = nn.Variable((batch_size, 1, embedding_size),
                                            need_grad=False)
        previous_action_embed.data.zero()
    # (batch_size, max_action_length + 1, embedding_size)
    target_action_embed = concatenate(previous_action_embed,
                                      target_action_embed,
                                      axis=1)
    # (batch_size, max_action_length, embedding_size)
    target_action_embed = F.slice(
        target_action_embed,
        start=[0, 0, 0],
        stop=[batch_size, max_action_length, embedding_size])

    # Parent action embedding
    parent_rule_mask = 1 - F.equal_scalar(target_parent_rule,
                                          0)  # (batch_size, max_action_length)
    parent_rule_mask = F.reshape(parent_rule_mask,
                                 (batch_size, max_action_length, 1))
    parent_rule_mask = F.broadcast(
        parent_rule_mask, (batch_size, max_action_length, embedding_size))
    with nn.parameter_scope("rule_embedding"):
        target_parent_rule_embed = embedding(target_parent_rule,
                                             rule_num,
                                             embedding_size,
                                             mask_zero=False)
    target_parent_rule_embed = parent_rule_mask * target_parent_rule_embed

    # (batch_size, max_action_length, embedding_size * 2 + node_type_embedding_size)
    decoder_input = concatenate(target_action_embed,
                                target_node_type_embed,
                                target_parent_rule_embed,
                                axis=2)
    target_action_mask = 1 - F.equal_scalar(F.sum(
        target_action_type, axis=2), 0)  # (batch_size, max_action_length)
    with nn.parameter_scope("decoder"):
        decoder_hidden_states, decoder_cells, ctx_vectors, new_hist = cond_att_lstm(
            decoder_input,
            target_parent_index,
            target_action_mask,
            query_embed,
            query_embed_mask,
            state_size,
            hidden_size,
            initial_state=initial_state,
            initial_cell=initial_cell,
            hist=hist,
            dropout=dropout,
            train=train)
    return target_action_embed, decoder_hidden_states, decoder_cells, ctx_vectors, target_action_mask, new_hist
Exemplo n.º 24
0
def LSTMAttentionDecoder(inputs=None,
                         encoder_output=None,
                         initial_state=None,
                         return_sequences=False,
                         return_state=False,
                         inference_params=None,
                         name='lstm'):

    if inputs is None:
        assert inference_params is not None, 'if inputs is None, inference_params must not be None.'
    else:
        sentence_length = inputs.shape[1]

    assert type(initial_state) is tuple or type(initial_state) is list, \
           'initial_state must be a typle or a list.'
    assert len(initial_state) == 2, \
           'initial_state must have only two states.'

    c0, h0 = initial_state

    assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
    batch_size, units = c0.shape

    cell = c0
    hidden = h0

    hs = []

    if inference_params is None:
        xs = F.split(F.slice(inputs,
                             stop=(batch_size, sentence_length - 1, units)),
                     axis=1)
        pad = nn.Variable.from_numpy_array(
            np.array([w2i_source['pad']] * batch_size))
        xs = [
            PF.embed(
                pad, vocab_size_source, embedding_size, name='enc_embeddings')
        ] + list(xs)

        compute_context = GlobalAttention(encoder_output, 1024)

        for x in xs:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
                context = compute_context(hidden)
                h_t = F.tanh(
                    PF.affine(F.concatenate(context, hidden, axis=1),
                              1024,
                              with_bias=False,
                              name='Wc'))
            hs.append(h_t)
    else:
        assert batch_size == 1, 'batch size of inference mode must be 1.'
        embed_weight, output_weight, output_bias = inference_params
        pad = nn.Variable.from_numpy_array(
            np.array([w2i_source['pad']] * batch_size))
        x = PF.embed(pad,
                     vocab_size_source,
                     embedding_size,
                     name='enc_embeddings')

        compute_context = GlobalAttention(encoder_output, 1024)

        word_index = 0
        ret = []
        i = 0
        while i2w_target[word_index] != '。' and i < 20:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
                context = compute_context(hidden)
                h_t = F.tanh(
                    PF.affine(F.concatenate(context, hidden, axis=1),
                              1024,
                              with_bias=False,
                              name='Wc'))
            output = F.affine(h_t, output_weight, bias=output_bias)
            word_index = np.argmax(output.d[0])
            ret.append(word_index)
            x = nn.Variable.from_numpy_array(
                np.array([word_index], dtype=np.int32))
            x = F.embed(x, embed_weight)

            i += 1
        return ret

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
Exemplo n.º 25
0
    def synthesis(self, w_mixed, constant_bc, seed=-1, noises_in=None):

        batch_size = w_mixed.shape[0]

        if noises_in is None:
            noise = F.randn(shape=(batch_size, 1, 4, 4), seed=seed)
        else:
            noise = noises_in[0]
        w = F.reshape(F.slice(w_mixed,
                              start=(0, 0, 0),
                              stop=(w_mixed.shape[0], 1, w_mixed.shape[2]),
                              step=(1, 1, 1)),
                      (w_mixed.shape[0], w_mixed.shape[2]),
                      inplace=False)
        h = styled_conv_block(constant_bc,
                              w,
                              noise,
                              res=self.resolutions[0],
                              outmaps=self.feature_map_dim,
                              namescope="Conv")
        torgb = styled_conv_block(h,
                                  w,
                                  noise=None,
                                  res=self.resolutions[0],
                                  outmaps=3,
                                  inmaps=self.feature_map_dim,
                                  kernel_size=1,
                                  pad_size=0,
                                  demodulate=False,
                                  namescope="ToRGB",
                                  act=F.identity)

        # initial feature maps
        outmaps = self.feature_map_dim
        inmaps = self.feature_map_dim

        downsize_index = 4 if self.resolutions[-1] in [512, 1024] else 3

        # resolution 8 x 8 - 1024 x 1024
        for i in range(1, len(self.resolutions)):

            i1 = (2 + i) * 2 - 5
            i2 = (2 + i) * 2 - 4
            i3 = (2 + i) * 2 - 3
            w_ = F.reshape(F.slice(w_mixed,
                                   start=(0, i1, 0),
                                   stop=(w_mixed.shape[0], i1 + 1,
                                         w_mixed.shape[2]),
                                   step=(1, 1, 1)),
                           w.shape,
                           inplace=False)
            if i > downsize_index:
                outmaps = outmaps // 2
            curr_shape = (batch_size, 1, self.resolutions[i],
                          self.resolutions[i])
            if noises_in is None:
                noise = F.randn(shape=curr_shape, seed=seed)
            else:
                noise = noises_in[2 * i - 1]

            h = styled_conv_block(h,
                                  w_,
                                  noise,
                                  res=self.resolutions[i],
                                  outmaps=outmaps,
                                  inmaps=inmaps,
                                  kernel_size=3,
                                  up=True,
                                  namescope="Conv0_up")

            w_ = F.reshape(F.slice(w_mixed,
                                   start=(0, i2, 0),
                                   stop=(w_mixed.shape[0], i2 + 1,
                                         w_mixed.shape[2]),
                                   step=(1, 1, 1)),
                           w.shape,
                           inplace=False)
            if i > downsize_index:
                inmaps = inmaps // 2
            if noises_in is None:
                noise = F.randn(shape=curr_shape, seed=seed)
            else:
                noise = noises_in[2 * i]
            h = styled_conv_block(h,
                                  w_,
                                  noise,
                                  res=self.resolutions[i],
                                  outmaps=outmaps,
                                  inmaps=inmaps,
                                  kernel_size=3,
                                  pad_size=1,
                                  namescope="Conv1")

            w_ = F.reshape(F.slice(w_mixed,
                                   start=(0, i3, 0),
                                   stop=(w_mixed.shape[0], i3 + 1,
                                         w_mixed.shape[2]),
                                   step=(1, 1, 1)),
                           w.shape,
                           inplace=False)
            curr_torgb = styled_conv_block(h,
                                           w_,
                                           noise=None,
                                           res=self.resolutions[i],
                                           outmaps=3,
                                           inmaps=inmaps,
                                           kernel_size=1,
                                           pad_size=0,
                                           demodulate=False,
                                           namescope="ToRGB",
                                           act=F.identity)

            torgb = F.add2(curr_torgb, upsample_2d(torgb, k=[1, 3, 3, 1]))

        return torgb
Exemplo n.º 26
0
def LSTMDecoder(inputs=None,
                initial_state=None,
                return_sequences=False,
                return_state=False,
                inference_params=None,
                name='lstm'):

    if inputs is None:
        assert inference_params is not None, 'if inputs is None, inference_params must not be None.'
    else:
        sentence_length = inputs.shape[1]

    assert type(initial_state) is tuple or type(initial_state) is list, \
           'initial_state must be a typle or a list.'
    assert len(initial_state) == 2, \
           'initial_state must have only two states.'

    c0, h0 = initial_state

    assert c0.shape == h0.shape, 'shapes of initial_state must be same.'
    batch_size, units = c0.shape

    cell = c0
    hidden = h0

    hs = []

    if inference_params is None:
        xs = F.split(F.slice(inputs,
                             stop=(batch_size, sentence_length - 1, units)),
                     axis=1)
        xs = [nn.Variable.from_numpy_array(np.ones(xs[0].shape))] + list(xs)
        for x in xs:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
            hs.append(hidden)
    else:
        assert batch_size == 1, 'batch size of inference mode must be 1.'
        embed_weight, output_weight, output_bias = inference_params
        x = nn.Variable.from_numpy_array(np.ones((1, embed_weight.shape[1])))

        word_index = 0
        ret = []
        i = 0
        while i2w_target[word_index] != period and i < 20:
            with nn.parameter_scope(name):
                cell, hidden = lstm_cell(x, cell, hidden)
            output = F.affine(hidden, output_weight, bias=output_bias)
            word_index = np.argmax(output.d[0])
            ret.append(word_index)
            x = nn.Variable.from_numpy_array(
                np.array([word_index], dtype=np.int32))
            x = F.embed(x, embed_weight)

            i += 1
        return ret

    if return_sequences:
        ret = F.stack(*hs, axis=1)
    else:
        ret = hs[-1]

    if return_state:
        return ret, cell, hidden
    else:
        return ret
Exemplo n.º 27
0
def ssd_loss(_ssd_confs, _ssd_locs, _label, _alpha=1):
    # input
    # _ssd_confs : type=nn.Variable, prediction of class. shape=(batch_size, default boxes, class num + 1)
    # _ssd_locs : type=nn.Variable, prediction of location. shape=(batch_size, default boxes, 4)
    # _label : type=nn.Variable, shape=(batch_size, default boxes, class num + 1 + 4)
    # _alpha : type=float, hyperparameter. this is weight of loc_loss.

    # output
    # loss : type=nn.Variable

    def smooth_L1(__pred_locs, __label_locs):
        # input
        # __pred_locs : type=nn.Variable, 
        # __label_locs : type=nn.Variable, 

        # output
        # _loss : type=nn.Variable, loss of location.

        return F.mul_scalar(F.huber_loss(__pred_locs, __label_locs), 0.5)

    # _label_conf : type=nn.Variable, label of class. shape=(batch_size, default boxes, class num + 1) (after one_hot)
    # _label_loc : type=nn.Variable, label of location. shape=(batch_size, default boxes, 4)
    label_conf = F.slice(
        _label, 
        start=(0,0,4), 
        stop=_label.shape, 
        step=(1,1,1)
    )
    label_loc = F.slice(
        _label, 
        start=(0,0,0), 
        stop=(_label.shape[0], _label.shape[1], 4), 
        step=(1,1,1)
    )

    # conf
    ssd_pos_conf, ssd_neg_conf = ssd_separate_conf_pos_neg(_ssd_confs)
    label_conf_pos, _ = ssd_separate_conf_pos_neg(label_conf)
    # pos
    pos_loss = F.sum(
                        F.mul2(
                            F.softmax(ssd_pos_conf, axis=2), 
                            label_conf_pos
                        )
                        , axis=2
                    )
    # neg
    neg_loss = F.sum(F.log(ssd_neg_conf), axis=2)
    conf_loss = F.sum(F.sub2(pos_loss, neg_loss), axis=1)

    # loc
    pos_label = F.sum(label_conf_pos, axis=2)      # =1 (if there is sonething), =0 (if there is nothing)
    loc_loss = F.sum(F.mul2(F.sum(smooth_L1(_ssd_locs, label_loc), axis=2), pos_label), axis=1)

    # [2019/07/18]
    label_match_default_box_num = F.slice(
        _label, 
        start=(0,0,_label.shape[2] - 1), 
        stop=_label.shape, 
        step=(1,1,1)
    )
    label_match_default_box_num = F.sum(label_match_default_box_num, axis=1)
    label_match_default_box_num = F.r_sub_scalar(label_match_default_box_num, _label.shape[1])
    label_match_default_box_num = F.reshape(label_match_default_box_num, (label_match_default_box_num.shape[0],), inplace=False)
    # label_match_default_box_num : type=nn.Variable, inverse number of default boxes that matches with pos.

    # loss
    loss = F.mul2(F.add2(conf_loss, F.mul_scalar(loc_loss, _alpha)), label_match_default_box_num)
    loss = F.mean(loss)
    return loss