示例#1
0
def resnet_model(ctx, x, inmaps=64, act=F.relu, test=False):
    # Conv -> BN -> Relu
    with nn.context_scope(ctx):
        with nn.parameter_scope("conv1"):
            h = PF.convolution(x, inmaps, kernel=(3, 3), pad=(1, 1), with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)
        
        h = res_unit(h, "conv2", act, False) # -> 32x32
        h = res_unit(h, "conv3", act, True)  # -> 16x16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv4", act, False) # -> 16x16
        h = res_unit(h, "conv5", act, True)  # -> 8x8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv6", act, False) # -> 8x8
        h = res_unit(h, "conv7", act, True)  # -> 4x4
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv8", act, False) # -> 4x4
        h = F.average_pooling(h, kernel=(4, 4))  # -> 1x1
        
        pred = PF.affine(h, 10)
    return pred
示例#2
0
def resnet_model(ctx, x, inmaps=64, act=F.relu, test=False):
    # Conv -> BN -> Relu
    with nn.context_scope(ctx):
        with nn.parameter_scope("conv1"):
            h = PF.convolution(x,
                               inmaps,
                               kernel=(3, 3),
                               pad=(1, 1),
                               with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)

        h = res_unit(h, "conv2", act, False)  # -> 32x32
        h = res_unit(h, "conv3", act, True)  # -> 16x16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv4", act, False)  # -> 16x16
        h = res_unit(h, "conv5", act, True)  # -> 8x8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv6", act, False)  # -> 8x8
        h = res_unit(h, "conv7", act, True)  # -> 4x4
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)
        h = res_unit(h, "conv8", act, False)  # -> 4x4
        h = F.average_pooling(h, kernel=(4, 4))  # -> 1x1

        pred = PF.affine(h, 10)
    return pred
示例#3
0
def get_loss(l1,
             l2,
             x,
             t,
             w_init,
             b_init,
             num_words,
             batch_size,
             state_size,
             dropout=False,
             dropout_rate=0.5,
             embed_name='embed',
             pred_name='pred'):
    e_list = [
        PF.embed(x_elm, num_words, state_size, name=embed_name)
        for x_elm in F.split(x, axis=1)
    ]
    t_list = F.split(t, axis=1)
    loss = 0
    for i, (e_t, t_t) in enumerate(zip(e_list, t_list)):
        if dropout:
            h1 = l1(F.dropout(e_t, dropout_rate), w_init, b_init)
            h2 = l2(F.dropout(h1, dropout_rate), w_init, b_init)
            y = PF.affine(F.dropout(h2, dropout_rate),
                          num_words,
                          name=pred_name)
        else:
            h1 = l1(e_t, w_init, b_init)
            h2 = l2(h1, w_init, b_init)
            y = PF.affine(h2, num_words, name=pred_name)
        t_t = F.reshape(t_t, [batch_size, 1])
        loss += F.mean(F.softmax_cross_entropy(y, t_t))
    loss /= float(i + 1)

    return loss
示例#4
0
def cnn_model_003(ctx, x, act=F.relu, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)

        # Convblock 3
        h = F.average_pooling(h, (6, 6))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))
        return h
示例#5
0
def cnn_model_003(ctx, x, act=F.relu, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act,
                      test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)

        return h
示例#6
0
文件: finetuning.py 项目: sony/nnabla
def construct_networks(args, images, model, num_class, test):
    try:
        pooled = model(images,
                       force_global_pooling=1,
                       use_up_to="pool",
                       training=not test)
    except:
        pooled = model(images, use_up_to="pool", training=not test)

    with nn.parameter_scope("finetuning"):
        if args.model == "VGG":
            pooled = F.relu(pooled)

            with nn.parameter_scope("additional_fc_1"):
                pooled = PF.affine(pooled, 4096)
            pooled = F.relu(pooled)
            if not test:
                pooled = F.dropout(pooled, 0.5)

            with nn.parameter_scope("additional_fc_2"):
                pooled = PF.affine(pooled, 4096)
            pooled = F.relu(pooled)
            if not test:
                pooled = F.dropout(pooled, 0.5)

        with nn.parameter_scope("last_fc"):
            pred = PF.affine(pooled, num_class)

    return pred
示例#7
0
def cnn_model_003(ctx, x, act=F.relu, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 28 -> 14
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 14 -> 7
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act,
                      test=test)  # 7 -> 5
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)

        # Convblock 3
        h = F.average_pooling(h, (5, 5))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))
        return h
示例#8
0
def cnn_ae_model_000(ctx, x, act=F.relu, test=False):
    with nn.parameter_scope("ae"):
        with nn.context_scope(ctx):
            # Convblock0
            h = conv_unit(x, "conv00", 32, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv01", 32, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv02", 32, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv03", 32, k=4, s=2, p=1, act=act, test=test)  # 32 -> 16
            if not test:
                h = F.dropout(h)
     
            # Convblock 1
            h = conv_unit(h, "conv10", 64, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv11", 64, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv12", 64, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv13", 64, k=4, s=2, p=1, act=act, test=test) # 16 -> 8
            if not test:
                h = F.dropout(h)
     
            # Deconvblock0
            h = deconv_unit(h, "deconv00", 64, k=4, s=2, p=1, act=act, test=test) # 8 -> 16
            h = deconv_unit(h, "deconv01", 64, k=3, s=1, p=1, act=act, test=test)
     
            h = deconv_unit(h, "deconv02", 64, k=3, s=1, p=1, act=act, test=test)
            h = deconv_unit(h, "deconv03", 64, k=3, s=1, p=1, act=act, test=test)  
     
            # Deconvblock 1
            h = deconv_unit(h, "deconv10", 32, k=4, s=2, p=1, act=act, test=test)  # 16 -> 32
            h = deconv_unit(h, "deconv11", 32, k=3, s=1, p=1, act=act, test=test)
            h = deconv_unit(h, "deconv12", 32, k=3, s=1, p=1, act=act, test=test)
            h = deconv_unit(h, "deconv13", 3, k=3, s=1, p=1, act=None, test=test)

        return h
示例#9
0
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act,
                      test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h_branch = h

        # Convblock 3
        h = conv_unit(h_branch,
                      "conv23",
                      10,
                      k=1,
                      s=1,
                      p=0,
                      act=act,
                      test=test)
        h = F.average_pooling(h, (6, 6))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))

        # Uncertainty
        u0 = conv_unit(h_branch, "u0", 10, k=1, s=1, p=0, act=act, test=test)
        u0 = F.average_pooling(u0, (6, 6))
        with nn.parameter_scope("u0bn"):
            u0 = PF.batch_normalization(u0, batch_stat=not test)
            log_var = F.reshape(u0, (u0.shape[0], np.prod(u0.shape[1:])))

        # Uncertainty for uncertainty
        u1 = conv_unit(h_branch, "u1", 10, k=1, s=1, p=0, act=act, test=test)
        u1 = F.average_pooling(u1, (6, 6))
        with nn.parameter_scope("u1bn"):
            u1 = PF.batch_normalization(u1, batch_stat=not test)
            log_s = F.reshape(u1, (u1.shape[0], np.prod(u1.shape[1:])))

        return pred, log_var, log_s
示例#10
0
def discriminator(x, maxh=256, test=False, output_hidden=False):
    """
    Building discriminator network which maps a (B, 1, 28, 28) input to
    a (B, 1).
    """

    # Define shortcut functions
    def bn(xx):
        # Batch normalization
        return PF.batch_normalization(xx, batch_stat=not test)

    def downsample2(xx, c):
        return PF.convolution(xx,
                              c, (3, 3),
                              pad=(1, 1),
                              stride=(2, 2),
                              with_bias=False)

    assert maxh / 8 > 0
    with nn.parameter_scope("dis"):
        # (1, 56, 56) --> (32, 28, 28)
        with nn.parameter_scope("conv0"):
            c0 = F.elu(bn(downsample2(x, maxh / 8)))
        if not test:
            c0 = F.dropout(c0, 0.2)
        # (32, 28, 28) --> (32, 16, 16)
        with nn.parameter_scope("conv1"):
            c1 = F.elu(
                bn(
                    PF.convolution(c0,
                                   maxh / 8, (3, 3),
                                   pad=(3, 3),
                                   stride=(2, 2),
                                   with_bias=False)))
        if not test:
            c1 = F.dropout(c1, 0.2)
        # (32, 16, 16) --> (64, 8, 8)
        with nn.parameter_scope("conv2"):
            c2 = F.elu(bn(downsample2(c1, maxh / 4)))
        # (64, 8, 8) --> (128, 4, 4)
        with nn.parameter_scope("conv3"):
            c3 = F.elu(bn(downsample2(c2, maxh / 2)))
        # (128, 4, 4) --> (256, 4, 4)
        with nn.parameter_scope("conv4"):
            c4 = bn(
                PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False))
        # (256, 4, 4) --> (1,)
        with nn.parameter_scope("fc1"):
            f = PF.affine(c4, 1)
    if output_hidden:
        return f, [c1, c2, c3, c4]
    return f
示例#11
0
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h_branch = h

        # Convblock 3
        h = conv_unit(h_branch, "conv23", 10, k=1, s=1, p=0, act=act, test=test)
        h = F.average_pooling(h, (6, 6))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))

        # Uncertainty
        u0 = conv_unit(h_branch, "u0", 10, k=1, s=1, p=0, act=act, test=test)
        u0 = F.average_pooling(u0, (6, 6))
        with nn.parameter_scope("u0bn"):
            u0 = PF.batch_normalization(u0, batch_stat=not test)
            log_var = F.reshape(u0, (u0.shape[0], np.prod(u0.shape[1:])))

        # Uncertainty for uncertainty
        u1 = conv_unit(h_branch, "u1", 10, k=1, s=1, p=0, act=act, test=test)
        u1 = F.average_pooling(u1, (6, 6))
        with nn.parameter_scope("u1bn"):
            u1 = PF.batch_normalization(u1, batch_stat=not test)
            log_s = F.reshape(u1, (u1.shape[0], np.prod(u1.shape[1:])))

        return pred, log_var, log_s
示例#12
0
def cnn_model_003(ctx, h, act=F.elu, do=True, test=False):
    with nn.context_scope(ctx):
        if not test:
            b, c, s, s = h.shape
            h = F.image_augmentation(h, (c, s, s),
                                     min_scale=1.0, max_scale=1.5,
                                     angle=0.5, aspect_ratio=1.3, distortion=0.2,
                                     flip_lr=True)
        # Convblock0
        h = conv_unit(h, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        u = h

        # Convblock 3
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)
        h = F.average_pooling(h, (6, 6))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))

        # Uncertainty
        u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test)
        u = F.average_pooling(u, (6, 6))
        with nn.parameter_scope("u0bn"):
            u = PF.batch_normalization(u, batch_stat=not test)
            log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:])))

        return pred, log_var
示例#13
0
def network(x, y_index, test=False):
    # Input -> 3,64,64
    # Convolution -> 16,31,31
    with nn.parameter_scope('Convolution'):
        h = PF.convolution(x, 16, (3, 3), (0, 0), (2, 2))
    # Tanh
    h = F.tanh(h)
    # MaxPooling -> 16,16,11
    h = F.max_pooling(h, (2, 3), (2, 3))
    # Dropout
    if not test:
        h = F.dropout(h)
    # Convolution_2 -> 32,6,5
    with nn.parameter_scope('Convolution_2'):
        h = PF.convolution(h, 32, (5, 3), (0, 0), (2, 2))
    # ReLU_4
    h = F.relu(h, True)
    # MaxPooling_2 -> 32,3,3
    h = F.max_pooling(h, (2, 2), (2, 2))
    # Dropout_2
    if not test:
        h = F.dropout(h)
    # Convolution_3 -> 64,1,1
    with nn.parameter_scope('Convolution_3'):
        h = PF.convolution(h, 64, (3, 3), (0, 0), (2, 2))
    # Tanh_2
    h = F.tanh(h)
    # Dropout_3
    if not test:
        h = F.dropout(h)
    # Affine -> 50
    with nn.parameter_scope('Affine'):
        h = PF.affine(h, (50, ))
    # ReLU_2
    h = F.relu(h, True)
    # Dropout_4
    if not test:
        h = F.dropout(h)
    # Affine_2 -> 5
    with nn.parameter_scope('Affine_2'):
        h = PF.affine(h, (5, ))
    # ELU
    h = F.elu(h)
    # Affine_3 -> 1
    with nn.parameter_scope('Affine_3'):
        h = PF.affine(h, (1, ))
    # SquaredError
    #h = F.squared_error(h, y_index)
    return h
示例#14
0
def cnn(batch_size, vocab_size, text_len, classes, features=128, train=True):
    text = nn.Variable([batch_size, text_len])

    with nn.parameter_scope("text_embed"):
        embed = PF.embed(text, n_inputs=vocab_size, n_features=features)
    print("embed", embed.shape)

    embed = F.reshape(embed, (batch_size, 1, text_len, features))
    print("embed", embed.shape)

    combined = None
    for n in range(2, 6): # 2 - 5 gram
        with nn.parameter_scope(str(n) + "_gram"):
            with nn.parameter_scope("conv"):
                conv = PF.convolution(embed, 128, kernel=(n, features))
                conv = F.relu(conv)
            with nn.parameter_scope("pool"):
                pool = F.max_pooling(conv, kernel=(conv.shape[2], 1))
                if not combined:
                    combined = F.identity(pool)
                else:
                    combined = F.concatenate(combined, pool)

    if train:
        combined = F.dropout(combined, 0.5)

    with nn.parameter_scope("output"):
        y = PF.affine(combined, classes)

    t = nn.Variable([batch_size, 1])

    _loss = F.softmax_cross_entropy(y, t)
    loss = F.reduce_mean(_loss)

    return text, y, loss, t
示例#15
0
    def __call__(self, x):
        # First conv
        h = self.conv_bn_relu6(x, int(self.init_maps * self.depth_mul),
                               stride=(2, 2), name="first-conv")

        # Inverted residual blocks
        for i, elm in enumerate(self.settings):
            t, c, n, s = elm
            # TODO: where to multiply depth_mul
            c = round(c * self.depth_mul)
            mbconv_s = partial(self.inverted_residual,
                               maps=c, stride=(s, s), ef=t)
            mbconv_1 = partial(self.inverted_residual,
                               maps=c, stride=(1, 1), ef=t)
            for j in range(n):
                name = "mbconv-{:02d}-{:02d}".format(i, j)
                h = mbconv_s(h, name=name) if j == 0 else mbconv_1(
                    h, name=name)
        # Last conv
        h = self.conv_bn_relu6(h, int(1280 * self.depth_mul),
                               kernel=(1, 1), name="last-conv")

        # Classifier
        if not self.test:
            h = F.dropout(h, 0.2)
        pool_shape = get_spatial_shape(x.shape, self.channel_last)
        h = F.average_pooling(h, pool_shape, channel_last=self.channel_last)
        h = PF.affine(h, self.num_classes,
                      w_init=I.NormalInitializer(0.01), name="linear")

        return h, {}
示例#16
0
    def __call__(self, x):
        # First conv
        h = self.conv_bn_act(x, int(self.maps0 * self.depth_mul),
                             stride=(2, 2), act="hswish", name="first-conv")

        # Inverted residual blocks
        for i, elm in enumerate(self.settings):
            maps, kernel, stride, ef, act, se = elm
            maps = round(maps * self.depth_mul)
            name = "mbconv-{:03d}".format(i)
            h = self.inverted_residual(
                h, maps, kernel, stride, ef, act, se, name=name)

        # Conv -> Avepool -> Conv
        h = self.conv_bn_act(h, int(self.maps1 * self.depth_mul), (1, 1), act="hswish",
                             name="last-conv-1")
        pool_shape = get_spatial_shape(x.shape, self.channel_last)
        h = F.average_pooling(h, pool_shape, channel_last=self.channel_last)
        h = self.conv_act(h, int(self.maps2 * self.depth_mul), (1, 1), act="hswish",
                          name="last-conv-2")

        # Classifier
        if not self.test:
            h = F.dropout(h, 0.2)
        h = PF.affine(h, self.num_classes,
                      w_init=I.NormalInitializer(0.01), name="linear")

        return h, {}
示例#17
0
 def wrapper(x, *args, **kwargs):
     residual = x
     h = layer_normalization(x)
     h = layer(h, *args, **kwargs)
     if kwargs['train']:
         h = F.dropout(h, p=kwargs['dropout_ratio'])
     return residual + h
示例#18
0
def transformer(train=True, droput_ratio=0.1):
    x = nn.Variable((batch_size, max_len))
    t = nn.Variable((batch_size, 1))
    mask = get_mask(x)
    with nn.parameter_scope('embedding_layer'):
        # h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
        h = token_embedding(x, vocab_size, embedding_size)
    h = position_encoding(h)

    if train:
        h = F.dropout(h, p=droput_ratio)

    for i in range(hopping_num):
        with nn.parameter_scope(f'encoder_hopping_{i}'):
            h = residual_normalization_wrapper(multihead_self_attention)(
                h,
                head_num,
                mask=mask,
                train=train,
                dropout_ratio=droput_ratio)
            h = residual_normalization_wrapper(positionwise_feed_forward)(
                h, train=train, dropout_ratio=droput_ratio)

    with nn.parameter_scope('output_layer'):
        y = F.sigmoid(PF.affine(h[:, 0, :], 1))

    accuracy = F.mean(F.equal(F.round(y), t))
    loss = F.mean(F.binary_cross_entropy(y, t))

    return x, y, t, accuracy, loss
示例#19
0
    def csc(x, scope_name, dn=False):
        C = x.shape[1]
        h = x
        with nn.parameter_scope(scope_name):

            with nn.parameter_scope("conv1"):
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h, True)
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   with_bias=False)

            with nn.parameter_scope("shift"):  # no meaning but semantics
                h = shift(h)

            with nn.parameter_scope("conv2"):
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h, True)
                stride = (2, 2) if dn else (1, 1)
                if p > 0:
                    h = F.dropout(h, p=0.5) if not test else h
                h = PF.convolution(h,
                                   C,
                                   kernel=(1, 1),
                                   pad=(0, 0),
                                   stride=stride,
                                   with_bias=False)
        s = F.average_pooling(x, (2, 2)) if dn else x
        return h + s
示例#20
0
def bert_embed(input_ids, token_type_ids=None, position_ids=None, vocab_size=30522, embed_dim=768,
               num_pos_ids=512, dropout_prob=0.1, test=True):
    """Construct the embeddings from word, position and token type."""

    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]
    if position_ids is None:
        position_ids = F.arange(0, seq_len)
        position_ids = F.broadcast(F.reshape(
            position_ids, (1,)+position_ids.shape), (batch_size,) + position_ids.shape)
    if token_type_ids is None:
        token_type_ids = F.constant(val=0, shape=(batch_size, seq_len))

    embeddings = PF.embed(input_ids, vocab_size,
                          embed_dim, name='word_embeddings')
    position_embeddings = PF.embed(
        position_ids, num_pos_ids, embed_dim, name='position_embeddings')
    token_type_embeddings = PF.embed(
        token_type_ids, 2, embed_dim, name='token_type_embeddings')

    embeddings += position_embeddings
    embeddings += token_type_embeddings
    embeddings = PF.layer_normalization(
        embeddings, batch_axis=(0, 1), eps=1e-12, name='embed')

    if dropout_prob > 0.0 and not test:
        embeddings = F.dropout(embeddings, dropout_prob)

    return embeddings
示例#21
0
def res_unit(x, scope_name, act=F.relu, dn=False, test=False):
    C = x.shape[1]

    with nn.parameter_scope(scope_name):
        # Conv -> BN -> Relu
        with nn.parameter_scope("conv1"):
            h = PF.convolution(x, C/2, kernel=(1, 1), pad=(0, 0), with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)
        # Conv -> BN -> Relu
        with nn.parameter_scope("conv2"):
            h = PF.convolution(h, C/2, kernel=(3, 3), pad=(1, 1), with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)
        # Conv -> BN
        with nn.parameter_scope("conv3"): 
            h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
    # Residual -> Relu
    if not test:
        h = F.dropout(h)
    with nn.parameter_scope(scope_name):
        h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
    h = F.add2(h, x)
    h = act(h)
    
    # Maxpooling
    if dn:
        h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))
    
    return h
示例#22
0
def build_self_attention_model(train=True):
    x = nn.Variable((batch_size, max_len))
    t = nn.Variable((batch_size, 1))
    mask = get_mask(x)
    attention_mask = (F.constant(1, shape=mask.shape) - mask) * F.constant(
        np.finfo(np.float32).min, shape=mask.shape)
    with nn.parameter_scope('embedding'):
        h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
    with nn.parameter_scope('forward'):
        h_f = lstm(h,
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)
    with nn.parameter_scope('backward'):
        h_b = lstm(h[:, ::-1, ],
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)[:, ::-1, ]
    h = F.concatenate(h_f, h_b, axis=2)
    if train:
        h = F.dropout(h, p=dropout_ratio)
    with nn.parameter_scope('da'):
        a = F.tanh(time_distributed(PF.affine)(h, da))
        if train:
            a = F.dropout(a, p=dropout_ratio)
    with nn.parameter_scope('r'):
        a = time_distributed(PF.affine)(a, r)
        if train:
            a = F.dropout(a, p=dropout_ratio)
        a = F.softmax(a + attention_mask, axis=1)
    m = F.batch_matmul(a, h, transpose_a=True)
    with nn.parameter_scope('output_mlp'):
        output = F.relu(PF.affine(m, output_mlp_size))
        if train:
            output = F.dropout(output, p=dropout_ratio)
    with nn.parameter_scope('output'):
        y = F.sigmoid(PF.affine(output, 1))

    accuracy = F.mean(F.equal(F.round(y), t))
    loss = F.mean(F.binary_cross_entropy(
        y, t)) + attention_penalty_coef * frobenius(
            F.batch_matmul(a, a, transpose_a=True) - batch_eye(batch_size, r))
    return x, t, accuracy, loss
示例#23
0
def test_dropout_forward_backward(p, seed, ctx, func_name):
    from nbla_test_utils import cap_ignore_region
    # Note: each backward execution requires a forward execution in NNabla.

    with nn.context_scope(ctx):
        # Create inputs
        rng = np.random.RandomState(seed)
        inputs = [
            cap_ignore_region(
                rng.randn(2, 3, 4).astype(np.float32) * 2, (-1e-3, 1e-3))
        ]  # Ensure there is no zero.
        x = nn.Variable(inputs[0].shape, need_grad=True)
        x.d = inputs[0]
        init_dx = rng.randn(*x.shape).astype(x.data.dtype)
        init_dy = rng.randn(*x.shape).astype(x.data.dtype)

        # Construct graph
        y = F.dropout(x, p)

        # Reference parameter
        scale = 1. / (1. - p)

        # Test forward
        y.forward(clear_buffer=True)
        mask = (y.d != 0)
        ref_y = x.d * mask * scale
        assert_allclose(y.d, ref_y)
        assert y.parent.name == func_name

        # Test backward
        x.g[...] = init_dx
        y.backward(init_dy, clear_buffer=True)
        ref_dx = init_dy * mask * scale
        assert_allclose(x.g, init_dx + ref_dx)

        # Test accumulation
        y.forward(clear_no_need_grad=True)
        mask = (y.d != 0)
        x.g[...] = 1
        y.g = init_dy
        y.parent.backward([x], [y], [False])
        ref_dx = init_dy * mask * scale
        assert_allclose(x.g, ref_dx)

        # Test accum=False with NaN gradient
        y.forward(clear_no_need_grad=True)
        x.g = np.float32('nan')
        y.parent.backward([x], [y], [False])
        assert not np.any(np.isnan(x.g))

        # Test need_grad
        y.forward(clear_no_need_grad=True)
        x.g[...] = 0
        x.need_grad = False
        y.backward(init_dy)
        assert np.all(x.g == 0)
示例#24
0
def network_LSTM(x, D, C, InputShape, HiddenSize, test=False):
    # Input_2:x -> 687
    # Delya_in:D -> 100
    # Cell_in:C -> 100

    # Concatenate -> 787
    h = F.concatenate(D, x, axis=1)

    # Affine -> 100
    h1 = PF.affine(h, HiddenSize, name='Affine')

    # InputGate -> 100
    h2 = PF.affine(h, HiddenSize, name='InputGate')

    # OutputGate -> 100
    h3 = PF.affine(h, HiddenSize, name='OutputGate')

    # ForgetGate -> 100
    h4 = PF.affine(h, HiddenSize, name='ForgetGate')
    # Sigmoid
    h1 = F.sigmoid(h1)
    # Sigmoid_2
    h2 = F.sigmoid(h2)

    # Sigmoid_3
    h3 = F.sigmoid(h3)
    # Sigmoid_4
    h4 = F.sigmoid(h4)

    # Mul2 -> 100
    h1 = F.mul2(h1, h2)

    # Mul2_3 -> 100
    h4 = F.mul2(h4, C)

    # Add2 -> 100
    h1 = F.add2(h1, h4, True)

    # Tanh
    h5 = F.tanh(h1)

    # Cell_out
    h6 = F.identity(h1)

    # Mul2_2 -> 100
    h5 = F.mul2(h5, h3)
    # Dropout
    if not test:
        h5 = F.dropout(h5)

    # Output
    h5 = F.identity(h5)

    # Concatenate_2 -> 200
    h5 = F.concatenate(h5, h6, axis=1)
    return h5
示例#25
0
def cnn_model_003(ctx, x, act=F.relu, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)

        # Learned attention multiplication
        h = one_by_one_conv(h, "attend0")
        h = F.max_pooling(h, (2, 2))  # 32 -> 16
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)

        # Learned attention multiplication
        h = one_by_one_conv(h, "attend1")
        h = F.max_pooling(h, (2, 2))  # 16 -> 8
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 8 -> 6
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)

        # Learned attention multiplication
        h = one_by_one_conv(h, "attend2")

        # Convblock 3
        h = F.average_pooling(h, (6, 6))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))
        return h
示例#26
0
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False):
    with nn.context_scope(ctx):
        # Convblock0
        h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 28 -> 14
        with nn.parameter_scope("bn0"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 1
        h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
        h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
        h = F.max_pooling(h, (2, 2))  # 14 -> 7
        with nn.parameter_scope("bn1"):
            h = PF.batch_normalization(h, batch_stat=not test)
        if not test and do:
            h = F.dropout(h)

        # Convblock 2
        h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 7 -> 5
        h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
        h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
        u = h

        # Convblock 3
        h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)
        h = F.average_pooling(h, (5, 5))
        with nn.parameter_scope("bn2"):
            h = PF.batch_normalization(h, batch_stat=not test)
        pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))

        # Uncertainty
        u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test)
        u = F.average_pooling(u, (5, 5))
        with nn.parameter_scope("u0bn"):
            u = PF.batch_normalization(u, batch_stat=not test)
            log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:])))

        return pred, log_var
示例#27
0
    def call(self, inputs):
        r"""Encoder layer.
        Args:
            inputs (nn.Variable): An input variable of shape (B, T) indicates indices
                of character embeddings.

        Returns:
            nn.Variable: Output variable of shape (T, B, C).
        """
        hp = self._hparams
        with nn.parameter_scope('embeddings'):
            val = np.sqrt(6.0 / (len(hp.vocab) + hp.symbols_embedding_dim))
            inputs = PF.embed(
                inputs,
                n_inputs=len(hp.vocab),
                n_features=hp.symbols_embedding_dim,
                initializer=UniformInitializer(lim=(-val,
                                                    val)))  # (B, T, C=512)

        with nn.parameter_scope('ngrams'):
            out = inputs
            for i in range(hp.encoder_n_convolutions):
                with nn.parameter_scope(f'filter_{i}'):
                    out = conv_norm(out,
                                    out_channels=hp.encoder_embedding_dim,
                                    kernel_size=hp.encoder_kernel_size,
                                    padding=(hp.encoder_kernel_size - 1) // 2,
                                    bias=False,
                                    stride=1,
                                    dilation=1,
                                    w_init_gain='relu',
                                    scope='conv_norm',
                                    channel_last=True)  # (B, C=512, T)
                    out = PF.batch_normalization(out,
                                                 batch_stat=self.training,
                                                 axes=[2])
                    out = F.relu(out)
                    if self.training:
                        # (B, C=512, T) --> (B, T, C=512)
                        out = F.dropout(out, 0.5)

        with nn.parameter_scope('lstm_encoder'):
            out = F.transpose(out, (1, 0, 2))  # (2, 0, 1))
            h = F.constant(shape=(2, 2, hp.batch_size,
                                  hp.encoder_embedding_dim // 2))
            c = F.constant(shape=(2, 2, hp.batch_size,
                                  hp.encoder_embedding_dim // 2))
            out, _, _ = PF.lstm(out,
                                h,
                                c,
                                training=self.training,
                                bidirectional=True)

        return out  # (T, B, C=512)
示例#28
0
def positionwise_feed_forward(x,
                              train: bool = True,
                              dropout_ratio: float = 0.1):
    batch_size, length, dim = x.shape
    with nn.parameter_scope('pff'):
        with nn.parameter_scope('w1'):
            h = F.relu(time_distributed(PF.affine)(x, dim * 4))
        if train:
            h = F.dropout(h, p=dropout_ratio)
        with nn.parameter_scope('w2'):
            h = time_distributed(PF.affine)(h, dim)
    return h
示例#29
0
def clf_resnet50(layer, n_classes=1, train=True):
    """
    This function uses ResNet-50 pretrained on ImageNet as the base architecture
    and replaces the linear layer in ResNet with two linear layers with the
    hidden layer of size 2,048. Dropout and ReLU are applied
    between these
    """
    layer_1 = F.relu(PF.affine(layer, 2048, name='classifier_1'))
    if train:
        layer_1 = F.dropout(layer_1, 0.5)
    out = PF.affine(layer_1, n_classes, name='classifier_2')
    return out
示例#30
0
def _scaled_dot_product_attention(q, k, v, attn_mask, dropout):
    B, Nt, E = q.shape
    q *= float(E)**-0.5
    # (B, Nt, E) x (B, E, Ns) -> (B, Nt, Ns)
    attn = F.batch_matmul(q, k, transpose_b=True)
    if attn_mask is not None:
        attn += attn_mask
    attn_output_weights = F.softmax(attn, axis=len(attn.shape) - 1)
    if dropout > 0.0:
        attn = F.dropout(attn, p=dropout)
    # (B, Nt, Ns) x (B, Ns, E) -> (B, Nt, E)
    attn_output = F.batch_matmul(attn_output_weights, v)
    return attn_output, attn_output_weights
示例#31
0
    def out_layers(self, h, emb):
        if self.scale_shift_norm:
            scale, shift = chunk(emb, num_chunk=2, axis=1)
            h = normalize(h, name="norm_out") * (scale + 1) + shift
        else:
            h += emb
            h = normalize(h, name="norm_out")

        h = nonlinearity(h)

        if self.dropout > 0:
            h = F.dropout(h, p=self.dropout)

        h = conv(h, self.out_channels, name="conv_out", zeroing_w=True)

        return h
示例#32
0
    def __call__(self, x):
        depth_coef = self.net_setting["depth_coef"]
        width_coef = self.net_setting["width_coef"]
        resolution = self.net_setting["resolution"]
        p = self.net_setting["p"]
        assert get_spatial_shape(x.shape, self.channel_last) == [resolution, resolution], \
            "(x.shape = {}, resolution = {})".format(x.shape, resolution)

        # First conv
        maps = self.round_filters(32, width_coef)
        h = self.conv_bn(x, maps, stride=(2, 2), name="first-conv")

        # Inverted residual blocks
        for i, elm in enumerate(self.mbc_settings):
            t, c, k, n, s = elm
            c = self.round_filters(c, width_coef)
            n = int(np.ceil(n * depth_coef))
            mbconv_s = partial(self.inverted_residual,
                               maps=c,
                               kernel=(k, k),
                               stride=(s, s),
                               ef=t)
            mbconv_1 = partial(self.inverted_residual,
                               maps=c,
                               kernel=(k, k),
                               stride=(1, 1),
                               ef=t)
            for j in range(n):
                name = "mbconv-{:02d}-{:02d}".format(i, j)
                h = mbconv_s(h, name=name) if j == 0 else mbconv_1(h,
                                                                   name=name)
        # Last conv
        maps = self.round_filters(1280, width_coef)
        h = self.conv_bn_swish(h, maps, kernel=(1, 1), name="last-conv")

        # Classifier
        if not self.test:
            h = F.dropout(h, p)
        pool_shape = get_spatial_shape(x.shape, self.channel_last)
        h = F.average_pooling(h, pool_shape, channel_last=self.channel_last)
        h = PF.affine(h,
                      self.num_classes,
                      w_init=I.NormalInitializer(0.01),
                      name="linear")

        return h, {}
示例#33
0
def res_unit_default(x, scope, bn_idx, test):
    # BatchNorm is independent from parameter sharing
    C = x.shape[1]
    with nn.parameter_scope(scope):
        with nn.parameter_scope('conv1'):
            with nn.parameter_scope('bn_{}-a'.format(bn_idx)):
                h = PF.batch_normalization(x, batch_stat=not test)
                h = F.relu(h)
            h = PF.convolution(h, C, (3, 3), pad=(1, 1), with_bias=False)
            with nn.parameter_scope('bn_{}-b'.format(bn_idx)):
                h = PF.batch_normalization(h, batch_stat=not test)
                h = F.relu(h)
            if not test:
                h = F.dropout(h, 0.25)
        with nn.parameter_scope('conv2'):
            h = PF.convolution(h, C, (3, 3), pad=(1, 1), with_bias=False)
    return x + h
示例#34
0
def res_unit(x, scope_name, act=F.relu, dn=False, test=False):
    C = x.shape[1]

    with nn.parameter_scope(scope_name):
        # Conv -> BN -> Relu
        with nn.parameter_scope("conv1"):
            h = PF.convolution(x,
                               C / 2,
                               kernel=(1, 1),
                               pad=(0, 0),
                               with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)
        # Conv -> BN -> Relu
        with nn.parameter_scope("conv2"):
            h = PF.convolution(h,
                               C / 2,
                               kernel=(3, 3),
                               pad=(1, 1),
                               with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
            h = act(h)
        # Conv -> BN
        with nn.parameter_scope("conv3"):
            h = PF.convolution(h,
                               C,
                               kernel=(1, 1),
                               pad=(0, 0),
                               with_bias=False)
            h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
    # Residual -> Relu
    if not test:
        h = F.dropout(h)
    with nn.parameter_scope(scope_name):
        h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test)
    h = F.add2(h, x)
    h = act(h)

    # Maxpooling
    if dn:
        h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2))

    return h
示例#35
0
def test_dropout_forward_backward(p, seed, ctx, func_name):
    from nbla_test_utils import cap_ignore_region, function_tester
    rng = np.random.RandomState(seed)
    inputs = [
        cap_ignore_region(
            rng.randn(2, 3, 4).astype(np.float32) * 2,
            (-1e-3, 1e-3))]  # Ensure there is no zero.
    i = nn.Variable(inputs[0].shape, need_grad=True)
    i.d = inputs[0]
    # NNabla forward
    with nn.context_scope(ctx), nn.auto_forward():
        o = F.dropout(i, p)
    scale = 1. / (1. - p)
    mask = o.d != 0
    assert np.allclose(o.d, i.d * mask * scale)
    assert o.parent.name == func_name

    # NNabla backward
    orig_grad = rng.randn(*i.shape).astype(i.data.dtype)
    i.g[...] = orig_grad
    o_grad = rng.randn(*i.shape).astype(i.data.dtype)
    o.backward(o_grad)
    ref_grad = o_grad * mask * scale

    # Verify
    assert np.allclose(i.g, orig_grad + ref_grad)

    # Check if accum option works
    i.g[...] = 1
    o.g = o_grad
    o.parent.backward([i], [o], [False])
    assert np.allclose(i.g, ref_grad)

    # Check accum=False with NaN gradient
    i.g = np.float32('nan')
    o.parent.backward([i], [o], [False])
    assert not np.any(np.isnan(i.g))

    # Check if need_grad works
    i.g[...] = 0
    i.need_grad = False
    o.backward(o_grad)
    assert np.all(i.g == 0)
示例#36
0
    def test_clearing_without_recompute_flag(self):
        x0 = nn.Variable((1, 128, 128), need_grad=True)
        x1 = F.sin(x0).apply(recompute=True)
        x2 = F.dropout(x1)
        x3 = F.sin(x2).apply(recompute=True)
        x4 = F.sin(x3).apply(recompute=True)
        y = F.identity(x4)

        # Skip this code temporarily since it cause
        # randomly crash when perform CI testing on windows 10 with nnabla-cuda-ext
        pytest.skip(
            'Skipped for randomly crash when perform CI testing on windows 10 with nnabla-cuda-ext')

        y.forward(clear_no_need_grad=True)
        x2.data.clear()
        with pytest.raises(RuntimeError, match="Failed `called_setup_recompute_`"):
            # x2.data cannot be recomputed correctly since `setup_recompute` is not called during forward propagation.
            # Backward should raise when some intermediate variables are cleared by user.
            y.backward()
示例#37
0
def test_dropout_forward_backward(p, seed, ctx, func_name):
    from nbla_test_utils import cap_ignore_region, function_tester
    rng = np.random.RandomState(seed)
    inputs = [
        cap_ignore_region(
            rng.randn(2, 3, 4).astype(np.float32) * 2, (-1e-3, 1e-3))
    ]  # Ensure there is no zero.
    i = nn.Variable(inputs[0].shape, need_grad=True)
    i.d = inputs[0]
    # NNabla forward
    with nn.context_scope(ctx), nn.auto_forward():
        o = F.dropout(i, p)
    scale = 1. / (1. - p)
    mask = o.d != 0
    assert_allclose(o.d, i.d * mask * scale)
    assert o.parent.name == func_name

    # NNabla backward
    orig_grad = rng.randn(*i.shape).astype(i.data.dtype)
    i.g[...] = orig_grad
    o_grad = rng.randn(*i.shape).astype(i.data.dtype)
    o.backward(o_grad)
    ref_grad = o_grad * mask * scale

    # Verify
    assert_allclose(i.g, orig_grad + ref_grad)

    # Check if accum option works
    i.g[...] = 1
    o.g = o_grad
    o.parent.backward([i], [o], [False])
    assert_allclose(i.g, ref_grad)

    # Check accum=False with NaN gradient
    i.g = np.float32('nan')
    o.parent.backward([i], [o], [False])
    assert not np.any(np.isnan(i.g))

    # Check if need_grad works
    i.g[...] = 0
    i.need_grad = False
    o.backward(o_grad)
    assert np.all(i.g == 0)
示例#38
0
def bn_dropout(h, scope_name, test=False):
    with nn.parameter_scope(scope_name):
        h = PF.batch_normalization(h, batch_stat=not test)
    if not test:
        h = F.dropout(h)
    return h
示例#39
0
def cnn_model_003_with_cross_attention(ctx, x_list, act=F.relu, test=False):
    """With attention before pooling
    """
    with nn.context_scope(ctx):
        # Convblock0
        h0_list = []
        for x in x_list:
            h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test)
            h0_list.append(h)

        # Corss attention
        ca0 = attention(h0_list[0], h0_list[1], h0_list[1], 
                        div_dim=True, softmax=True)
        ca1 = attention(h0_list[1], h0_list[0], h0_list[0], 
                        div_dim=True, softmax=True)

        # Maxpooing, Batchnorm, Dropout
        h0_list = []
        for h in [ca0, ca1]:
            h = F.max_pooling(h, (2, 2))  # 32 -> 16
            with nn.parameter_scope("bn0"):
                h = PF.batch_normalization(h, batch_stat=not test)
            if not test:
                h = F.dropout(h)
            h0_list.append(h)

        # Convblock 1
        h1_list = []
        for h in h0_list:
            h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test)
            h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test)
            h1_list.append(h)

        # Corss attention
        ca0 = attention(h1_list[0], h1_list[1], h1_list[1], 
                        div_dim=True, softmax=True)
        ca1 = attention(h1_list[1], h1_list[0], h1_list[0], 
                        div_dim=True, softmax=True)
            
        # Maxpooing, Batchnorm, Dropout
        h1_list = []
        for h in [ca0, ca1]:
            h = F.max_pooling(h, (2, 2))  # 16 -> 8
            with nn.parameter_scope("bn1"):
                h = PF.batch_normalization(h, batch_stat=not test)
            if not test:
                h = F.dropout(h)
                h1_list.append(h)

        # Convblock 2
        h2_list = []
        for h in h1_list:
            h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test)  # 8 -> 6
            h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test)
            h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test)
            h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test)
            h2_list.append(h)

        # Corss attention
        ca0 = attention(h2_list[0], h2_list[1], h2_list[1], 
                        div_dim=True, softmax=True)
        ca1 = attention(h2_list[1], h2_list[0], h2_list[0], 
                        div_dim=True, softmax=True)

        # Convblock 3
        h3_list = []
        for h in [ca0, ca1]:
            h = F.average_pooling(h, (6, 6))
            with nn.parameter_scope("bn2"):
                h = PF.batch_normalization(h, batch_stat=not test)
            h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:])))
            h3_list.append(h)
        return h3_list