Exemplo n.º 1
0
 def _lstm_cell(self, name, n_hidden, x_in, h=None, c=None):
     if h is None:
         h = nn.Variable.from_numpy_array(
             np.zeros((self._batch_size, self._cols_size)))
     if c is None:
         c = nn.Variable.from_numpy_array(
             np.zeros((self._batch_size, n_hidden)))
     h = F.concatenate(h, x_in, axis=1)  # LSTM_Concatenate -> cols_size * 2
     with nn.parameter_scope(name + '_Affine'):  # LSTM_Affine -> n_hidden
         h1 = PF.affine(h, (n_hidden, ), base_axis=1)
     with nn.parameter_scope(name + '_IGate'):  # LSTM_IGate -> n_hidden
         h2 = PF.affine(h, (n_hidden, ), base_axis=1)
     with nn.parameter_scope(name + '_FGate'):  # LSTM_FGate -> n_hidden
         h3 = PF.affine(h, (n_hidden, ), base_axis=1)
     with nn.parameter_scope(name + '_OGate'):  # LSTM_OGate -> n_hidden
         h4 = PF.affine(h, (n_hidden, ), base_axis=1)
     h1 = F.tanh(h1)  # LSTM_Tanh
     h2 = F.sigmoid(h2)  # LSTM_Sigmoid
     h3 = F.sigmoid(h3)  # LSTM_Sigmoid_2
     h4 = F.sigmoid(h4)  # LSTM_Sigmoid_3
     h5 = F.mul2(h2, h1)  # LSTM_Mul2 -> n_hidden
     h6 = F.mul2(h3, c)  # LSTM_Mul2_2 -> n_hidden
     h7 = F.add2(h5, h6, inplace=True)  # LSTM_Add2 -> n_hidden
     h8 = F.tanh(h7)  # LSTM_Tanh_2 -> n_hidden
     h9 = F.mul2(h4, h8)  # LSTM_Mul2_3 -> n_hidden
     c = h7  # LSTM_C
     h = h9  # LSTM_H
     return (h, c)
Exemplo n.º 2
0
def LSTMCell(x, h2, h1):

    units = h1.shape[1]

    #first stack  h2=hidden, h1= cell
    h2 = F.concatenate(h2, x, axis=1)

    h3 = PF.affine(h2, (units), name='Affine')

    h4 = PF.affine(h2, (units), name='InputGate')

    h5 = PF.affine(h2, (units), name='ForgetGate')

    h6 = PF.affine(h2, (units), name='OutputGate')

    h3 = F.tanh(h3)

    h4 = F.sigmoid(h4)

    h5 = F.sigmoid(h5)

    h6 = F.sigmoid(h6)

    h4 = F.mul2(h4, h3)

    h5 = F.mul2(h5, h1)

    h4 = F.add2(h4, h5, True)

    h7 = F.tanh(h4)

    h6 = F.mul2(h6, h7)

    return h6, h4  # hidden, cell
Exemplo n.º 3
0
 def graph(x1):
     x1 = F.identity(x1).apply(recompute=True)
     x2 = F.randn(shape=x1.shape, seed=123).apply(recompute=True)
     x3 = F.rand(shape=x1.shape, seed=456).apply(recompute=True)
     y = F.mul2(x1, x2).apply(recompute=True)
     y = F.mul2(y, x3).apply(recompute=True)
     y = F.identity(y)
     return y
Exemplo n.º 4
0
def network_LSTM(x, D, C, InputShape, HiddenSize, test=False):
    # Input_2:x -> 687
    # Delya_in:D -> 100
    # Cell_in:C -> 100

    # Concatenate -> 787
    h = F.concatenate(D, x, axis=1)

    # Affine -> 100
    h1 = PF.affine(h, HiddenSize, name='Affine')

    # InputGate -> 100
    h2 = PF.affine(h, HiddenSize, name='InputGate')

    # OutputGate -> 100
    h3 = PF.affine(h, HiddenSize, name='OutputGate')

    # ForgetGate -> 100
    h4 = PF.affine(h, HiddenSize, name='ForgetGate')
    # Sigmoid
    h1 = F.sigmoid(h1)
    # Sigmoid_2
    h2 = F.sigmoid(h2)

    # Sigmoid_3
    h3 = F.sigmoid(h3)
    # Sigmoid_4
    h4 = F.sigmoid(h4)

    # Mul2 -> 100
    h1 = F.mul2(h1, h2)

    # Mul2_3 -> 100
    h4 = F.mul2(h4, C)

    # Add2 -> 100
    h1 = F.add2(h1, h4, True)

    # Tanh
    h5 = F.tanh(h1)

    # Cell_out
    h6 = F.identity(h1)

    # Mul2_2 -> 100
    h5 = F.mul2(h5, h3)
    # Dropout
    if not test:
        h5 = F.dropout(h5)

    # Output
    h5 = F.identity(h5)

    # Concatenate_2 -> 200
    h5 = F.concatenate(h5, h6, axis=1)
    return h5
Exemplo n.º 5
0
def propagate(h,
              edges,
              state_size=None,
              w_initializer=None,
              u_initializer1=None,
              u_initializer2=None,
              bias_initializer=None,
              edge_initializers=None):
    """
    Propagate vertex representations

    Arguments:

    h                 -- the input vertex representations (nnabla.Variable with shape (|V|, D))
    edges             -- the dictionary that represents the graph edge ({label, [in, out]})
    state_size        -- (optional) the size of hidden state (h.shape[1] is used if this argument is None)
    w_initializer     -- (optional)
    u_initializer1    -- (optional)
    u_initializer2    -- (optional)
    bias_initializer  -- (optional)
    edge_initializers -- (optional)

    Return value

    - Return a variable with shape (|V|, D)
    """
    if state_size is None:
        state_size = h.shape[1]
    h_size = h.shape[1]
    with nn.parameter_scope("activate"):
        a = activate(h,
                     edges,
                     state_size,
                     bias_initializer=bias_initializer,
                     edge_initializers=edge_initializers)
    with nn.parameter_scope("W_zr"):
        ws = PF.affine(a, (3, h_size), with_bias=False, w_init=w_initializer)
    (z1, r1, h_hat1) = split(ws, axis=1)
    with nn.parameter_scope("U_zr"):
        us = PF.affine(h, (2, state_size),
                       with_bias=False,
                       w_init=u_initializer1)
    (z2, r2) = split(us, axis=1)
    z = F.sigmoid(F.add2(z1, z2))
    r = F.sigmoid(F.add2(r1, r2))
    with nn.parameter_scope("U"):
        h_hat2 = PF.affine(F.mul2(r, h),
                           state_size,
                           with_bias=False,
                           w_init=u_initializer2)
    h_hat = F.tanh(F.add2(h_hat1, h_hat2))
    return F.add2(F.sub2(h, F.mul2(z, h)), F.mul2(z, h_hat))
Exemplo n.º 6
0
def disparityregression(x, maxdisp):
    disp = nn.Variable((x.shape), need_grad=False)
    for i in range(0, maxdisp):
        disp.d[:, :, i, :, :] = i
    dispx = F.mul2(disp, x)
    out = F.sum(dispx, axis=2)
    return out
Exemplo n.º 7
0
def test_imperative_i2_o1():
    import nnabla.functions as F
    x0 = nn.NdArray([2, 3, 4])
    x1 = nn.NdArray([2, 1, 1])
    x0.fill(3)
    x1.fill(0.5)
    y = F.mul2(x0, x1)
    assert np.allclose(y.data, 1.5)
Exemplo n.º 8
0
def test_imperative_i2_o1():
    import nnabla.functions as F
    x0 = nn.NdArray([2, 3, 4])
    x1 = nn.NdArray([2, 1, 1])
    x0.fill(3)
    x1.fill(0.5)
    y = F.mul2(x0, x1)
    assert np.allclose(y.data, 1.5)
Exemplo n.º 9
0
 def call(self, input):
     if self._drop_prob == 0:
         return input
     mask = F.rand(shape=(input.shape[0], 1, 1, 1))
     mask = F.greater_equal_scalar(mask, self._drop_prob)
     out = F.mul_scalar(input, 1. / (1 - self._drop_prob))
     out = F.mul2(out, mask)
     return out
Exemplo n.º 10
0
    def call(self, input):
        if self._mode == 'full':
            out = F.stack(*[op(input) for op in self._ops], axis=0)
            out = F.mul2(out, F.softmax(self._alpha, axis=0))
            return F.sum(out, axis=0)

        # update active index
        self._update_active_index()

        return self._ops[self._active](input)
Exemplo n.º 11
0
def test_large_transform_binary(fname, ctx, func_name):
    if not func_name.endswith('Cuda'):
        pytest.skip('Grid-strided loop is tested only for CUDA backend')

    with nn.context_scope(ctx), nn.auto_forward(True):
        a = nn.Variable.from_numpy_array(np.random.randn(
            1024, 64, 1)).apply(need_grad=True)
        b = nn.Variable.from_numpy_array(np.random.randn(
            1024, 64, 3)).apply(need_grad=True)
        c = F.mul2(a, b)
        c.backward()
Exemplo n.º 12
0
    def call(self, *input):
        if self._mode == 'concat' and len(input) > 1:
            return F.concatenate(*input, axis=self._axis)

        out = input[0]
        if self._mode == 'add':
            for i in range(1, len(input)):
                out = F.add2(out, input[i])

        if self._mode == 'mul':
            for i in range(1, len(input)):
                out = F.mul2(out, input[i])

        return out
Exemplo n.º 13
0
def drop_path(x):
    """
        The same implementation as PyTorch versions.
        rate: Variable. drop rate. if the random value drawn from
                uniform distribution is less than the drop_rate,
                corresponding element becomes 0.
    """
    drop_prob = nn.parameter.get_parameter_or_create("drop_rate",
                                                     shape=(1, 1, 1, 1), need_grad=False)
    mask = F.rand(shape=(x.shape[0], 1, 1, 1))
    mask = F.greater_equal(mask, drop_prob)
    x = F.div2(x, 1 - drop_prob)
    x = F.mul2(x, mask)
    return x
Exemplo n.º 14
0
    def test_recomputed_data_value(self, seed):
        rng = np.random.RandomState(seed)
        a0 = nn.Variable((2, 3), need_grad=True)
        b0 = nn.Variable((2, 3), need_grad=True)
        a0.d = rng.randn(*a0.shape)
        b0.d = rng.randn(*b0.shape)

        a1 = F.sin(a0).apply(recompute=True)
        a2 = F.sin(a1)
        a3 = F.sin(a2)

        b1 = F.sin(b0)
        b2 = F.sin(b1).apply(recompute=True)
        b3 = F.sin(b2)

        c0 = F.mul2(a3, b3).apply(recompute=True)
        c1 = F.sin(c0)

        # Forward

        # Get output data which will be recomputed.
        ref_data = []  # data of a0, b2 and c0 will be stored.

        def get_output_data(nnabla_func):
            outputs = nnabla_func.outputs
            for output in outputs:
                if output.recompute:
                    ref_data.append(copy.deepcopy(output.d))

        c1.forward(function_post_hook=get_output_data)

        # Backward

        # Get recomputed data
        act_data = []

        def get_recomputed_data(nnabla_func):
            inputs = nnabla_func.inputs
            for input in inputs:
                if input.recompute:
                    act_data.append(copy.deepcopy(input.d))

        c1.backward(function_pre_hook=get_recomputed_data)
        # Make the order the same as `ref_data`.
        act_data.reverse()

        # Check recomputed data
        for act, ref in zip(act_data, ref_data):
            assert_allclose(act, ref, rtol=0, atol=0)
Exemplo n.º 15
0
def context_preserving_loss(xa, yb):
    def mask_weight(a, b):
        # much different from definition in the paper
        merged_mask = F.concatenate(a, b, axis=1)
        summed_mask = F.sum((merged_mask + 1) / 2, axis=1, keepdims=True)
        clipped = F.clip_by_value(summed_mask,
                                  F.constant(0, shape=summed_mask.shape),
                                  F.constant(1, shape=summed_mask.shape))
        z = clipped * 2 - 1
        mask = (1 - z) / 2
        return mask

    x = xa[:, :3, :, :]
    a = xa[:, 3:, :, :]
    y = yb[:, :3, :, :]
    b = yb[:, 3:, :, :]

    assert x.shape == y.shape and a.shape == b.shape
    W = mask_weight(a, b)
    return F.mean(F.mul2(F.absolute_error(x, y), W))
Exemplo n.º 16
0
    def __mul__(self, other):
        """
        Element-wise multiplication.
        Implements the multiplication operator expression ``A * B``, together with :func:`~nnabla.variable.__rmul__` .
        When a scalar is specified for ``other``, this function performs an
        element-wise operation for all elements in ``self``.

        Args:
            other (float or ~nnabla.Variable): Internally calling
                :func:`~nnabla.functions.mul2` or
                :func:`~nnabla.functions.mul_scalar` according to the
                type.

        Returns: :class:`nnabla.Variable`

        """
        import nnabla.functions as F
        if isinstance(other, Variable):
            return F.mul2(self, other)
        return F.mul_scalar(self, other)
Exemplo n.º 17
0
    def test_clear_input_if_no_need_grad_branch1(self):
        x1 = nn.Variable([1, 5], need_grad=True)
        x2 = nn.Variable([1, 5], need_grad=True)
        x3 = nn.Variable([1, 5], need_grad=True)

        xx1 = F.identity(x1)
        xx2 = F.identity(x2)
        y1 = F.mul2(xx1, xx2)  # (1)
        xx3 = F.identity(x3)
        y2 = F.add2(xx2, xx3)  # (2)
        y3 = F.add2(y1, y2)  # (3)

        answer = []
        answer.append([False])
        answer.append([False])
        answer.append([False, False])  # (1)
        answer.append([False])
        answer.append([False, True])  # (2) use xx2 in backward
        answer.append([True, True])  # (3)

        y3.forward(clear_no_need_grad=True)
        self.check_input_data_clear_called_flags(answer)
Exemplo n.º 18
0
    def test_unnecessary_traverse_1(self):
        a0 = nn.Variable((2, 3), need_grad=False)
        # `a1` will not be recomputed since `a2` will not be cleared.
        a1 = F.sin(a0).apply(recompute=True)
        a2 = F.cos(a1)
        a3 = F.sin(a2).apply(recompute=True)  # 'a3` will be recomputed.

        b0 = nn.Variable((2, 3), need_grad=True).apply(recompute=True)
        b1 = F.identity(b0).apply(recompute=True)

        c = F.mul2(a3, b1).apply(recompute=True)

        # Check recomputation recursion stops when `a3.data` is calculated.

        c.forward(clear_buffer=False)
        # `a1.data` is cleared because `recompute` flag is `true`.
        assert(a1.data.clear_called == True)
        # `a2.data` is not cleared because `recompute` flag is `false`.
        assert(a2.data.clear_called == False)
        c.backward(clear_buffer=False)
        # If the recursive call reached to `a1`, `a1.data` should be set by recomputation.
        # However, the recursive call stops at `a2` whose data is not cleared.
        assert(a1.data.clear_called == True)
Exemplo n.º 19
0
def graph_representation(h, x, n_outmaps, w_init=None, b_init=None):
    """
    Outputs graph representaiton model

    Arguments:

    h                 -- the input vertex representations (nnabla.Variable with shape (|V|, H))
    x                 -- the input vertex annotation (nnabla.Variable with shape (|V|, X))
    n_outmaps         -- the size of node representation
    w_init            -- (optional)
    b_init            -- (optional)

    Return value

    - Return a variable with shape (n_outmaps)
    """
    with nn.parameter_scope("graph_representation"):
        output = F.concatenate(h, x)
        output = PF.affine(output, (2, n_outmaps),
                           w_init=w_init,
                           b_init=b_init)
        (s, t) = F.split(output, axis=1)
        return F.sum(F.mul2(F.sigmoid(s), F.tanh(t)), axis=0, keepdims=True)
Exemplo n.º 20
0
def train(args):
    if args.c_dim != len(args.selected_attrs):
        print("c_dim must be the same as the num of selected attributes. Modified c_dim.")
        args.c_dim = len(args.selected_attrs)

    # Dump the config information.
    config = dict()
    print("Used config:")
    for k in args.__dir__():
        if not k.startswith("_"):
            config[k] = getattr(args, k)
            print("'{}' : {}".format(k, getattr(args, k)))

    # Prepare Generator and Discriminator based on user config.
    generator = functools.partial(
        model.generator, conv_dim=args.g_conv_dim, c_dim=args.c_dim, num_downsample=args.num_downsample, num_upsample=args.num_upsample, repeat_num=args.g_repeat_num)
    discriminator = functools.partial(model.discriminator, image_size=args.image_size,
                                      conv_dim=args.d_conv_dim, c_dim=args.c_dim, repeat_num=args.d_repeat_num)

    x_real = nn.Variable(
        [args.batch_size, 3, args.image_size, args.image_size])
    label_org = nn.Variable([args.batch_size, args.c_dim, 1, 1])
    label_trg = nn.Variable([args.batch_size, args.c_dim, 1, 1])

    with nn.parameter_scope("dis"):
        dis_real_img, dis_real_cls = discriminator(x_real)

    with nn.parameter_scope("gen"):
        x_fake = generator(x_real, label_trg)
    x_fake.persistent = True  # to retain its value during computation.

    # get an unlinked_variable of x_fake
    x_fake_unlinked = x_fake.get_unlinked_variable()

    with nn.parameter_scope("dis"):
        dis_fake_img, dis_fake_cls = discriminator(x_fake_unlinked)

    # ---------------- Define Loss for Discriminator -----------------
    d_loss_real = (-1) * loss.gan_loss(dis_real_img)
    d_loss_fake = loss.gan_loss(dis_fake_img)
    d_loss_cls = loss.classification_loss(dis_real_cls, label_org)
    d_loss_cls.persistent = True

    # Gradient Penalty.
    alpha = F.rand(shape=(args.batch_size, 1, 1, 1))
    x_hat = F.mul2(alpha, x_real) + \
        F.mul2(F.r_sub_scalar(alpha, 1), x_fake_unlinked)

    with nn.parameter_scope("dis"):
        dis_for_gp, _ = discriminator(x_hat)
    grads = nn.grad([dis_for_gp], [x_hat])

    l2norm = F.sum(grads[0] ** 2.0, axis=(1, 2, 3)) ** 0.5
    d_loss_gp = F.mean((l2norm - 1.0) ** 2.0)

    # total discriminator loss.
    d_loss = d_loss_real + d_loss_fake + args.lambda_cls * \
        d_loss_cls + args.lambda_gp * d_loss_gp

    # ---------------- Define Loss for Generator -----------------
    g_loss_fake = (-1) * loss.gan_loss(dis_fake_img)
    g_loss_cls = loss.classification_loss(dis_fake_cls, label_trg)
    g_loss_cls.persistent = True

    # Reconstruct Images.
    with nn.parameter_scope("gen"):
        x_recon = generator(x_fake_unlinked, label_org)
    x_recon.persistent = True

    g_loss_rec = loss.recon_loss(x_real, x_recon)
    g_loss_rec.persistent = True

    # total generator loss.
    g_loss = g_loss_fake + args.lambda_rec * \
        g_loss_rec + args.lambda_cls * g_loss_cls

    # -------------------- Solver Setup ---------------------
    d_lr = args.d_lr  # initial learning rate for Discriminator
    g_lr = args.g_lr  # initial learning rate for Generator
    solver_dis = S.Adam(alpha=args.d_lr, beta1=args.beta1, beta2=args.beta2)
    solver_gen = S.Adam(alpha=args.g_lr, beta1=args.beta1, beta2=args.beta2)

    # register parameters to each solver.
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())

    # -------------------- Create Monitors --------------------
    monitor = Monitor(args.monitor_path)
    monitor_d_cls_loss = MonitorSeries(
        'real_classification_loss', monitor, args.log_step)
    monitor_g_cls_loss = MonitorSeries(
        'fake_classification_loss', monitor, args.log_step)
    monitor_loss_dis = MonitorSeries(
        'discriminator_loss', monitor, args.log_step)
    monitor_recon_loss = MonitorSeries(
        'reconstruction_loss', monitor, args.log_step)
    monitor_loss_gen = MonitorSeries('generator_loss', monitor, args.log_step)
    monitor_time = MonitorTimeElapsed("Training_time", monitor, args.log_step)

    # -------------------- Prepare / Split Dataset --------------------
    using_attr = args.selected_attrs
    dataset, attr2idx, idx2attr = get_data_dict(args.attr_path, using_attr)
    random.seed(313)  # use fixed seed.
    random.shuffle(dataset)  # shuffle dataset.
    test_dataset = dataset[-2000:]  # extract 2000 images for test

    if args.num_data:
        # Use training data partially.
        training_dataset = dataset[:min(args.num_data, len(dataset) - 2000)]
    else:
        training_dataset = dataset[:-2000]
    print("Use {} images for training.".format(len(training_dataset)))

    # create data iterators.
    load_func = functools.partial(stargan_load_func, dataset=training_dataset,
                                  image_dir=args.celeba_image_dir, image_size=args.image_size, crop_size=args.celeba_crop_size)
    data_iterator = data_iterator_simple(load_func, len(
        training_dataset), args.batch_size, with_file_cache=False, with_memory_cache=False)

    load_func_test = functools.partial(stargan_load_func, dataset=test_dataset,
                                       image_dir=args.celeba_image_dir, image_size=args.image_size, crop_size=args.celeba_crop_size)
    test_data_iterator = data_iterator_simple(load_func_test, len(
        test_dataset), args.batch_size, with_file_cache=False, with_memory_cache=False)

    # Keep fixed test images for intermediate translation visualization.
    test_real_ndarray, test_label_ndarray = test_data_iterator.next()
    test_label_ndarray = test_label_ndarray.reshape(
        test_label_ndarray.shape + (1, 1))

    # -------------------- Training Loop --------------------
    one_epoch = data_iterator.size // args.batch_size
    num_max_iter = args.max_epoch * one_epoch

    for i in range(num_max_iter):
        # Get real images and labels.
        real_ndarray, label_ndarray = data_iterator.next()
        label_ndarray = label_ndarray.reshape(label_ndarray.shape + (1, 1))
        label_ndarray = label_ndarray.astype(float)
        x_real.d, label_org.d = real_ndarray, label_ndarray

        # Generate target domain labels randomly.
        rand_idx = np.random.permutation(label_org.shape[0])
        label_trg.d = label_ndarray[rand_idx]

        # ---------------- Train Discriminator -----------------
        # generate fake image.
        x_fake.forward(clear_no_need_grad=True)
        d_loss.forward(clear_no_need_grad=True)
        solver_dis.zero_grad()
        d_loss.backward(clear_buffer=True)
        solver_dis.update()

        monitor_loss_dis.add(i, d_loss.d.item())
        monitor_d_cls_loss.add(i, d_loss_cls.d.item())
        monitor_time.add(i)

        # -------------- Train Generator --------------
        if (i + 1) % args.n_critic == 0:
            g_loss.forward(clear_no_need_grad=True)
            solver_dis.zero_grad()
            solver_gen.zero_grad()
            x_fake_unlinked.grad.zero()
            g_loss.backward(clear_buffer=True)
            x_fake.backward(grad=None)
            solver_gen.update()
            monitor_loss_gen.add(i, g_loss.d.item())
            monitor_g_cls_loss.add(i, g_loss_cls.d.item())
            monitor_recon_loss.add(i, g_loss_rec.d.item())
            monitor_time.add(i)

            if (i + 1) % args.sample_step == 0:
                # save image.
                save_results(i, args, x_real, x_fake,
                             label_org, label_trg, x_recon)
                if args.test_during_training:
                    # translate images from test dataset.
                    x_real.d, label_org.d = test_real_ndarray, test_label_ndarray
                    label_trg.d = test_label_ndarray[rand_idx]
                    x_fake.forward(clear_no_need_grad=True)
                    save_results(i, args, x_real, x_fake, label_org,
                                 label_trg, None, is_training=False)

        # Learning rates get decayed
        if (i + 1) > int(0.5 * num_max_iter) and (i + 1) % args.lr_update_step == 0:
            g_lr = max(0, g_lr - (args.lr_update_step *
                                  args.g_lr / float(0.5 * num_max_iter)))
            d_lr = max(0, d_lr - (args.lr_update_step *
                                  args.d_lr / float(0.5 * num_max_iter)))
            solver_gen.set_learning_rate(g_lr)
            solver_dis.set_learning_rate(d_lr)
            print('learning rates decayed, g_lr: {}, d_lr: {}.'.format(g_lr, d_lr))

    # Save parameters and training config.
    param_name = 'trained_params_{}.h5'.format(
        datetime.datetime.today().strftime("%m%d%H%M"))
    param_path = os.path.join(args.model_save_path, param_name)
    nn.save_parameters(param_path)
    config["pretrained_params"] = param_name

    with open(os.path.join(args.model_save_path, "training_conf_{}.json".format(datetime.datetime.today().strftime("%m%d%H%M"))), "w") as f:
        json.dump(config, f)

    # -------------------- Translation on test dataset --------------------
    for i in range(args.num_test):
        real_ndarray, label_ndarray = test_data_iterator.next()
        label_ndarray = label_ndarray.reshape(label_ndarray.shape + (1, 1))
        label_ndarray = label_ndarray.astype(float)
        x_real.d, label_org.d = real_ndarray, label_ndarray

        rand_idx = np.random.permutation(label_org.shape[0])
        label_trg.d = label_ndarray[rand_idx]

        x_fake.forward(clear_no_need_grad=True)
        save_results(i, args, x_real, x_fake, label_org,
                     label_trg, None, is_training=False)
Exemplo n.º 21
0
 def graph(x1, x2):
     x1 = F.identity(x1).apply(recompute=True)
     x2 = F.identity(x2).apply(recompute=True)
     y = F.mul2(x1, x2)
     y = F.identity(y)
     return y
Exemplo n.º 22
0
def constructing_cell(args,
                      ops,
                      which_cell,
                      cell_prev_prev,
                      cell_prev,
                      output_filter,
                      is_reduced_curr,
                      is_reduced_prev,
                      test=False):
    """
        Constructing one cell.
        input:
            args: arguments set by user.
            ops: operations used in the network.
            arch_dict: a dictionary containing architecture information.
            which_cell: int. An index of cell currently constructed.
            cell_prev_prev: Variable. Output of the cell behind the previous cell.
            cell_prev: Variable. Output of the previous cell.
            output_filter:t he number of the filter used for this cell.
            is_reduced_curr: bool. True if the current cell is the reduction cell.
            is_reduced_prev: bool. True if the previous cell is the reduction cell.
            test: bool. True if the network is for validation.
    """

    # If True, all the parameters in batch_normalizations won't be updated.
    is_search = True

    if is_reduced_curr:
        keyname_basis = "alpha_reduction"
        output_shape = (cell_prev.shape[0], output_filter,
                        cell_prev.shape[2] // 2, cell_prev.shape[3] // 2)
    else:
        keyname_basis = "alpha_normal"
        output_shape = (cell_prev.shape[0], output_filter, cell_prev.shape[2],
                        cell_prev.shape[3])

    if is_reduced_prev:
        scope = "fr{}".format(which_cell)
        cell_prev_prev = factorized_reduction(cell_prev_prev, output_filter,
                                              scope, test, is_search)
    else:
        scope = "preprocess_cell{}_node{}".format(which_cell, 0)
        cell_prev_prev = conv1x1(cell_prev_prev, output_filter, scope, test,
                                 is_search)

    scope = "preprocess_cell{}_node{}".format(which_cell, 1)
    cell_prev = conv1x1(cell_prev, output_filter, scope, test, is_search)

    num_of_nodes = args.num_nodes

    # latter_nodes are all the intermediate nodes,
    # except for 2 input nodes and 1 output node.
    latter_nodes = [
        nn.Variable(output_shape) for _ in range(num_of_nodes - 2 - 1)
    ]
    for v in latter_nodes:
        v.d = 0  # initialize.

    num_of_ops = len(ops)

    # prepare a list to store all nodes.
    nodes = [cell_prev_prev, cell_prev] + latter_nodes
    for i in range(num_of_nodes - 2):
        successors = [_ for _ in range(i + 1, num_of_nodes - 1)]
        for j in successors:
            if j == 1:
                continue
            from_node, to_node = i, j
            scope = "cell{}/node{}_{}".format(which_cell, from_node, to_node)

            stacked_x = num_of_ops * (nodes[i], )
            stacked_x = tuple([
                op(x, output_filter, scope + "/ops{}".format(op_id), i,
                   is_reduced_curr, test, is_search) for x, op, op_id in zip(
                       stacked_x, tuple(ops.values()), tuple(ops.keys()))
            ])
            y = F.stack(*stacked_x, axis=0)

            alpha_name = keyname_basis + "_{}_{}".format(i, j)
            current_alpha = nn.parameter.get_parameter_or_create(
                alpha_name, (num_of_ops, ) + (1, 1, 1, 1))
            alpha_prob = F.softmax(current_alpha, axis=0)
            y = F.mul2(y, alpha_prob)
            if i == 0:
                nodes[j] = F.sum(y, axis=0)
            else:
                nodes[j] = F.add2(nodes[j], F.sum(y, axis=0))

    intermediate_nodes = nodes[2:num_of_nodes - 1]
    output = F.concatenate(*intermediate_nodes, axis=1)

    is_reduced_prev = is_reduced_curr
    return output, is_reduced_curr, is_reduced_prev, output_filter
Exemplo n.º 23
0
def sample_from_controller(args):
    """
        2-layer RNN(LSTM) based controller which outputs an architecture of CNN, 
        represented as a sequence of integers and its list.
        Given the number of layers, for each layer, 
        it executes 2 types of computation, one for sampling the operation at that layer,
        another for sampling the skip connection patterns.
    """

    entropys = nn.Variable([1, 1], need_grad=True)
    log_probs = nn.Variable([1, 1], need_grad=True)
    skip_penaltys = nn.Variable([1, 1], need_grad=True)

    entropys.d = log_probs.d = skip_penaltys.d = 0.0  # initialize them all

    num_layers = args.num_layers
    lstm_size = args.lstm_size
    state_size = args.state_size
    lstm_num_layers = args.lstm_layers
    skip_target = args.skip_prob
    temperature = args.temperature
    tanh_constant = args.tanh_constant
    num_branch = args.num_ops

    arc_seq = []
    initializer = I.UniformInitializer((-0.1, 0.1))

    prev_h = [
        nn.Variable([1, lstm_size], need_grad=True)
        for _ in range(lstm_num_layers)
    ]
    prev_c = [
        nn.Variable([1, lstm_size], need_grad=True)
        for _ in range(lstm_num_layers)
    ]

    for i in range(len(prev_h)):
        prev_h[i].d = 0  # initialize variables in lstm layers.
        prev_c[i].d = 0

    inputs = nn.Variable([1, lstm_size])
    inputs.d = np.random.normal(0, 0.5, [1, lstm_size])

    g_emb = nn.Variable([1, lstm_size])
    g_emb.d = np.random.normal(0, 0.5, [1, lstm_size])

    skip_targets = nn.Variable([1, 2])
    skip_targets.d = np.array([[1.0 - skip_target, skip_target]])

    for layer_id in range(num_layers):
        # One-step stacked LSTM.
        with nn.parameter_scope("controller_lstm"):
            next_h, next_c = stack_lstm(inputs, prev_h, prev_c, state_size)
        prev_h, prev_c = next_h, next_c  # shape:(1, lstm_size)

        # Compute for operation.
        with nn.parameter_scope("ops"):
            logit = PF.affine(next_h[-1],
                              num_branch,
                              w_init=initializer,
                              with_bias=False)

        if temperature is not None:
            logit = F.mul_scalar(logit, (1 / temperature))

        if tanh_constant is not None:
            logit = F.mul_scalar(F.tanh(logit),
                                 tanh_constant)  # (1, num_branch)

        # normalizing logits.
        normed_logit = np.e**logit.d
        normed_logit = normed_logit / np.sum(normed_logit)

        # Sampling operation id from multinomial distribution.
        ops_id = np.random.multinomial(1, normed_logit[0], 1).nonzero()[1]
        ops_id = nn.Variable.from_numpy_array(ops_id)  # (1, )
        arc_seq.append(ops_id.d)

        # log policy for operation.
        log_prob = F.softmax_cross_entropy(logit,
                                           F.reshape(ops_id,
                                                     shape=(1, 1)))  # (1, )
        # accumulate log policy as log probs
        log_probs = F.add2(log_probs, log_prob)

        entropy = log_prob * F.exp(-log_prob)
        entropys = F.add2(entropys, entropy)  # accumulate entropy as entropys.

        w_emb = nn.parameter.get_parameter_or_create("w_emb",
                                                     [num_branch, lstm_size],
                                                     initializer,
                                                     need_grad=False)

        inputs = F.reshape(w_emb[int(ops_id.d)],
                           (1, w_emb.shape[1]))  # (1, lstm_size)

        with nn.parameter_scope("controller_lstm"):
            next_h, next_c = stack_lstm(inputs, prev_h, prev_c, lstm_size)
        prev_h, prev_c = next_h, next_c  # (1, lstm_size)

        with nn.parameter_scope("skip_affine_3"):
            adding_w_1 = PF.affine(next_h[-1],
                                   lstm_size,
                                   w_init=initializer,
                                   with_bias=False)  # (1, lstm_size)

        if layer_id == 0:
            inputs = g_emb  # (1, lstm_size)
            anchors = next_h[-1]  # (1, lstm_size)
            anchors_w_1 = adding_w_1  # then goes back to the entry point of the loop

        else:
            # (layer_id, lstm_size) this shape during the process
            query = anchors_w_1

            with nn.parameter_scope("skip_affine_1"):
                query = F.tanh(
                    F.add2(
                        query,
                        PF.affine(next_h[-1],
                                  lstm_size,
                                  w_init=initializer,
                                  with_bias=False)))
                #              (layer_id, lstm_size)   +   (1, lstm_size)
                # broadcast occurs here. resulting shape is; (layer_id, lstm_size)

            with nn.parameter_scope("skip_affine_2"):
                query = PF.affine(query,
                                  1,
                                  w_init=initializer,
                                  with_bias=False)  # (layer_id, 1)
            # note that each weight for skip_affine_X is shared across all steps of LSTM.

            # re-define logits, now its shape is;(layer_id, 2)
            logit = F.concatenate(-query, query, axis=1)

            if temperature is not None:
                logit = F.mul_scalar(logit, (1 / temperature))

            if tanh_constant is not None:
                logit = F.mul_scalar(F.tanh(logit), tanh_constant)

            skip_prob_unnormalized = F.exp(logit)  # (layer_id, 2)

            # normalizing skip_prob_unnormalized.
            summed = F.sum(skip_prob_unnormalized, axis=1,
                           keepdims=True).apply(need_grad=False)
            summed = F.concatenate(summed, summed, axis=1)

            skip_prob_normalized = F.div2(skip_prob_unnormalized,
                                          summed)  # (layer_id, 2)

            # Sampling skip_pattern from multinomial distribution.
            skip_pattern = np.random.multinomial(
                1, skip_prob_normalized.d[0],
                layer_id).nonzero()[1]  # (layer_id, 1)
            arc_seq.append(skip_pattern)
            skip = nn.Variable.from_numpy_array(skip_pattern)

            # compute skip penalty.
            # (layer_id, 2) broadcast occurs here too
            kl = F.mul2(skip_prob_normalized,
                        F.log(F.div2(skip_prob_normalized, skip_targets)))
            kl = F.sum(kl, keepdims=True)
            # get the mean value here in advance.
            kl = kl * (1.0 / (num_layers - 1))

            # accumulate kl divergence as skip penalty.
            skip_penaltys = F.add2(skip_penaltys, kl)

            # log policy for connection.
            log_prob = F.softmax_cross_entropy(
                logit, F.reshape(skip, shape=(skip.shape[0], 1)))
            log_probs = F.add2(log_probs, F.sum(log_prob, keepdims=True))

            entropy = F.sum(log_prob * F.exp(-log_prob), keepdims=True)
            # accumulate entropy as entropys.
            entropys = F.add2(entropys, entropy)

            skip = F.reshape(skip, (1, layer_id))

            inputs = F.affine(skip,
                              anchors).apply(need_grad=False)  # (1, lstm_size)
            inputs = F.mul_scalar(inputs, (1.0 / (1.0 + (np.sum(skip.d)))))

            # add new row for the next computation
            # (layer_id + 1, lstm_size)
            anchors = F.concatenate(anchors, next_h[-1], axis=0)
            # (layer_id + 1, lstm_size)
            anchors_w_1 = F.concatenate(anchors_w_1, adding_w_1, axis=0)

    return arc_seq, log_probs, entropys, skip_penaltys
Exemplo n.º 24
0
def styled_conv_block(conv_input,
                      w,
                      noise=None,
                      res=4,
                      inmaps=512,
                      outmaps=512,
                      kernel_size=3,
                      pad_size=1,
                      demodulate=True,
                      namescope="Conv",
                      up=False,
                      act=F.leaky_relu):
    """
    Conv block with skip connection for Generator
    """
    batch_size = conv_input.shape[0]

    with nn.parameter_scope(f'G_synthesis/{res}x{res}/{namescope}'):
        W, bias = weight_init_fn(shape=(w.shape[1], inmaps))
        runtime_coef = (1. / np.sqrt(512)).astype(np.float32)
        style = F.affine(w, W * runtime_coef, bias) + 1.0
    runtime_coef_for_conv = (
        1 / np.sqrt(np.prod([inmaps, kernel_size, kernel_size]))).astype(
            np.float32)

    if up:
        init_function = weight_init_fn(shape=(inmaps, outmaps, kernel_size,
                                              kernel_size),
                                       return_init=True)
        conv_weight = nn.parameter.get_parameter_or_create(
            name=f'G_synthesis/{res}x{res}/{namescope}/conv/W',
            shape=(inmaps, outmaps, kernel_size, kernel_size),
            initializer=init_function)
    else:
        init_function = weight_init_fn(shape=(outmaps, inmaps, kernel_size,
                                              kernel_size),
                                       return_init=True)
        conv_weight = nn.parameter.get_parameter_or_create(
            name=f'G_synthesis/{res}x{res}/{namescope}/conv/W',
            shape=(outmaps, inmaps, kernel_size, kernel_size),
            initializer=init_function)
    conv_weight = F.mul_scalar(conv_weight, runtime_coef_for_conv)
    if up:
        scale = F.reshape(style, (style.shape[0], style.shape[1], 1, 1, 1),
                          inplace=False)
    else:
        scale = F.reshape(style, (style.shape[0], 1, style.shape[1], 1, 1),
                          inplace=False)

    mod_w = F.mul2(
        F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=False),
        scale)

    if demodulate:
        if up:
            denom_w = F.pow_scalar(
                F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) +
                1e-8, 0.5)
        else:
            denom_w = F.pow_scalar(
                F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) +
                1e-8, 0.5)
        demod_w = F.div2(mod_w, denom_w)

    else:
        demod_w = mod_w

    conv_input = F.reshape(conv_input,
                           (1, -1, conv_input.shape[2], conv_input.shape[3]),
                           inplace=False)
    demod_w = F.reshape(
        demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]),
        inplace=False)

    if up:
        k = [1, 3, 3, 1]
        conv_out = upsample_conv_2d(conv_input,
                                    demod_w,
                                    k,
                                    factor=2,
                                    gain=1,
                                    group=batch_size)
    else:
        conv_out = F.convolution(conv_input,
                                 demod_w,
                                 pad=(pad_size, pad_size),
                                 group=batch_size)
        conv_out = F.reshape(
            conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]),
            inplace=False)

    if noise is not None:
        noise_coeff = nn.parameter.get_parameter_or_create(
            name=f'G_synthesis/{res}x{res}/{namescope}/noise_strength',
            shape=())
        conv_out = F.add2(conv_out, noise * F.reshape(noise_coeff,
                                                      (1, 1, 1, 1)))
    else:
        conv_out = conv_out

    bias = nn.parameter.get_parameter_or_create(
        name=f'G_synthesis/{res}x{res}/{namescope}/conv/b',
        shape=(outmaps, ),
        initializer=np.random.randn(outmaps, ).astype(np.float32))
    conv_out = F.add2(conv_out,
                      F.reshape(bias, (1, outmaps, 1, 1), inplace=False))

    if act == F.leaky_relu:
        conv_out = F.mul_scalar(F.leaky_relu(conv_out,
                                             alpha=0.2,
                                             inplace=False),
                                np.sqrt(2),
                                inplace=False)
    else:
        conv_out = act(conv_out)

    return conv_out
Exemplo n.º 25
0
def conv_block(input,
               w,
               noise=None,
               res=4,
               outmaps=512,
               inmaps=512,
               kernel_size=3,
               pad_size=1,
               demodulate=True,
               namescope="Conv",
               up=False,
               act=F.leaky_relu):
    """
        single convoluiton block used in each resolution.
    """

    batch_size = input.shape[0]
    with nn.parameter_scope(f"G_synthesis/{res}x{res}/{namescope}"):
        runtime_coef = 1. / np.sqrt(512)
        W, bias = weight_init_fn(shape=(w.shape[1], inmaps))
        runtime_coef = 1. / np.sqrt(512)
        s = F.affine(w, W * runtime_coef, bias) + 1.0

    runtime_coef_for_conv = 1 / \
        np.sqrt(np.prod([inmaps, kernel_size, kernel_size]))

    if up:
        conv_weight = nn.parameter.get_parameter_or_create(
            name=f"G_synthesis/{res}x{res}/{namescope}/conv/W",
            shape=(inmaps, outmaps, kernel_size, kernel_size))
    else:
        conv_weight = nn.parameter.get_parameter_or_create(
            name=f"G_synthesis/{res}x{res}/{namescope}/conv/W",
            shape=(outmaps, inmaps, kernel_size, kernel_size))
    conv_weight = conv_weight * runtime_coef_for_conv

    if up:
        scale = F.reshape(s, (s.shape[0], s.shape[1], 1, 1, 1), inplace=True)
    else:
        scale = F.reshape(s, (s.shape[0], 1, s.shape[1], 1, 1), inplace=True)

    mod_w = F.mul2(
        F.reshape(conv_weight, (1, ) + conv_weight.shape, inplace=True), scale)

    if demodulate:
        if up:
            denom_w = F.pow_scalar(
                F.sum(F.pow_scalar(mod_w, 2.), axis=[1, 3, 4], keepdims=True) +
                1e-8, 0.5)
        else:
            denom_w = F.pow_scalar(
                F.sum(F.pow_scalar(mod_w, 2.), axis=[2, 3, 4], keepdims=True) +
                1e-8, 0.5)
        demod_w = F.div2(mod_w, denom_w)

    else:
        demod_w = mod_w

    input = F.reshape(input, (1, -1, input.shape[2], input.shape[3]),
                      inplace=True)
    demod_w = F.reshape(
        demod_w, (-1, demod_w.shape[2], demod_w.shape[3], demod_w.shape[4]),
        inplace=True)

    if up:
        k = [1, 3, 3, 1]
        conv_out = upsample_conv_2d(input,
                                    demod_w,
                                    k,
                                    factor=2,
                                    gain=1,
                                    group=batch_size)
    else:
        conv_out = F.convolution(input,
                                 demod_w,
                                 pad=(pad_size, pad_size),
                                 group=batch_size)
        conv_out = F.reshape(
            conv_out, (batch_size, -1, conv_out.shape[2], conv_out.shape[3]),
            inplace=True)

    if noise is not None:
        noise_coeff = nn.parameter.get_parameter_or_create(
            name=f"G_synthesis/{res}x{res}/{namescope}/noise_strength",
            shape=())
        output = conv_out + noise * \
            F.reshape(noise_coeff, (1, 1, 1, 1), inplace=False)
    else:
        output = conv_out

    bias = nn.parameter.get_parameter_or_create(
        name=f"G_synthesis/{res}x{res}/{namescope}/conv/b", shape=(outmaps, ))
    output = output + F.reshape(bias, (1, outmaps, 1, 1), inplace=False)

    if act == F.leaky_relu:
        output = F.leaky_relu(output, alpha=0.2) * np.sqrt(2)
    else:
        output = act(output)
    return output
Exemplo n.º 26
0
def ssd_loss(_ssd_confs, _ssd_locs, _label, _alpha=1):
    # input
    # _ssd_confs : type=nn.Variable, prediction of class. shape=(batch_size, default boxes, class num + 1)
    # _ssd_locs : type=nn.Variable, prediction of location. shape=(batch_size, default boxes, 4)
    # _label : type=nn.Variable, shape=(batch_size, default boxes, class num + 1 + 4)
    # _alpha : type=float, hyperparameter. this is weight of loc_loss.

    # output
    # loss : type=nn.Variable

    def smooth_L1(__pred_locs, __label_locs):
        # input
        # __pred_locs : type=nn.Variable, 
        # __label_locs : type=nn.Variable, 

        # output
        # _loss : type=nn.Variable, loss of location.

        return F.mul_scalar(F.huber_loss(__pred_locs, __label_locs), 0.5)

    # _label_conf : type=nn.Variable, label of class. shape=(batch_size, default boxes, class num + 1) (after one_hot)
    # _label_loc : type=nn.Variable, label of location. shape=(batch_size, default boxes, 4)
    label_conf = F.slice(
        _label, 
        start=(0,0,4), 
        stop=_label.shape, 
        step=(1,1,1)
    )
    label_loc = F.slice(
        _label, 
        start=(0,0,0), 
        stop=(_label.shape[0], _label.shape[1], 4), 
        step=(1,1,1)
    )

    # conf
    ssd_pos_conf, ssd_neg_conf = ssd_separate_conf_pos_neg(_ssd_confs)
    label_conf_pos, _ = ssd_separate_conf_pos_neg(label_conf)
    # pos
    pos_loss = F.sum(
                        F.mul2(
                            F.softmax(ssd_pos_conf, axis=2), 
                            label_conf_pos
                        )
                        , axis=2
                    )
    # neg
    neg_loss = F.sum(F.log(ssd_neg_conf), axis=2)
    conf_loss = F.sum(F.sub2(pos_loss, neg_loss), axis=1)

    # loc
    pos_label = F.sum(label_conf_pos, axis=2)      # =1 (if there is sonething), =0 (if there is nothing)
    loc_loss = F.sum(F.mul2(F.sum(smooth_L1(_ssd_locs, label_loc), axis=2), pos_label), axis=1)

    # [2019/07/18]
    label_match_default_box_num = F.slice(
        _label, 
        start=(0,0,_label.shape[2] - 1), 
        stop=_label.shape, 
        step=(1,1,1)
    )
    label_match_default_box_num = F.sum(label_match_default_box_num, axis=1)
    label_match_default_box_num = F.r_sub_scalar(label_match_default_box_num, _label.shape[1])
    label_match_default_box_num = F.reshape(label_match_default_box_num, (label_match_default_box_num.shape[0],), inplace=False)
    # label_match_default_box_num : type=nn.Variable, inverse number of default boxes that matches with pos.

    # loss
    loss = F.mul2(F.add2(conf_loss, F.mul_scalar(loc_loss, _alpha)), label_match_default_box_num)
    loss = F.mean(loss)
    return loss