Exemplo n.º 1
0
 def __init__(self, planes, stride=1, downsample=None, is_training=True):
     self.conv_block1 = ConvBlock(planes, 1, use_bn=True, use_act=True, is_training=is_training, name='conv1')
     self.conv_block2 = ConvBlock(planes, stride=stride, use_bn=True, use_act=True, is_training=is_training,
                                  name='conv2')
     self.conv_block3 = ConvBlock(planes * self.expansion, 1, use_bn=True, use_act=False, is_training=is_training,
                                  name='conv3')
     self.downsample = downsample
Exemplo n.º 2
0
 def __init__(self, sess, config, is_training=True):
     self.resnet_model = resnet.__dict__.get(config.resnet_model)(
         sess, pretrained=True, is_training=is_training)
     self.aspp = ASPP(is_training=is_training)
     self.conv = ConvBlock(config.num_classes,
                           1,
                           1,
                           is_training=is_training,
                           name='conv1')
Exemplo n.º 3
0
    def __init__(self, block, layers, num_classes=1000, is_training=True, scope=None):
        self.scope = scope

        self.inplanes = 64
        self.conv_block1 = ConvBlock(64, 7, 2, use_bn=True, use_act=True, is_training=is_training, name='conv1')
        self.maxpool = MaxPool2d(3, 2, "same")
        self.layer1 = self._make_layer(block, 64, layers[0], is_training=is_training, name='block1')
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, is_training=is_training, name='block2')
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, is_training=is_training, name='block3')
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, is_training=is_training, name='block4')
        self.avgpool = AvgPool2d(7, 7)
        self.fc = FullyConnected(num_classes)
Exemplo n.º 4
0
class ASPP(object):
    '''
    AtrousSpatialPyramidPoolingModule: consists of (a) one 1x1 convolution and three 3x3 convolutions with
    rates = (6, 12, 18) when output stride = 16 (all with 256 filters and batch normalization)
    '''
    def __init__(self, depth=256, is_training=True):
        self.avg_pool_conv = ConvBlock(depth,
                                       1,
                                       is_training=is_training,
                                       name='avg_pool')

        self.atrous_pool_block_1 = ConvBlock(depth,
                                             1,
                                             is_training=is_training,
                                             name='block1')
        self.atrous_pool_block_6 = ConvBlock(depth,
                                             3,
                                             dilation=6,
                                             is_training=is_training,
                                             name='block2')
        self.atrous_pool_block_12 = ConvBlock(depth,
                                              3,
                                              dilation=12,
                                              is_training=is_training,
                                              name='block3')
        self.atrous_pool_block_18 = ConvBlock(depth,
                                              3,
                                              dilation=18,
                                              is_training=is_training,
                                              name='block4')

        self.conv_out = ConvBlock(depth,
                                  1,
                                  is_training=is_training,
                                  name='conv1')

    def forward(self, inputs):
        with tf.variable_scope('aspp'):
            feature_map_size = tf.shape(inputs)

            # Global average pooling
            image_features = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True)
            image_features = self.avg_pool_conv.forward(image_features)
            image_features = tf.image.resize_bilinear(
                image_features, (feature_map_size[1], feature_map_size[2]))

            out_1x1_1 = self.atrous_pool_block_1.forward(inputs)
            out_3x3_6 = self.atrous_pool_block_6.forward(inputs)
            out_3x3_12 = self.atrous_pool_block_12.forward(inputs)
            out_3x3_18 = self.atrous_pool_block_18.forward(inputs)

            out = tf.concat(
                [image_features, out_1x1_1, out_3x3_6, out_3x3_12, out_3x3_18],
                axis=3)
            out = self.conv_out.forward(out)

            return out

    def __call__(self, x):
        return self.forward(x)
Exemplo n.º 5
0
    def _make_layer(self, block, planes, blocks, stride=1, is_training=True, name=None):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = ConvBlock(planes * block.expansion, 1, stride, use_bn=True, is_training=is_training,
                                   name='shortcut')

        layers = []
        layers.append(block(planes, stride, downsample, is_training))
        self.inplanes = planes * block.expansion
        for _ in xrange(1, blocks):
            layers.append(block(planes))

        return Sequential(layers, name)
Exemplo n.º 6
0
 def __init__(self, planes, stride=1, downsample=None, is_training=True):
     self.conv_block1 = ConvBlock(planes, stride=stride, use_bn=True, use_act=True, is_training=is_training)
     self.conv_block2 = ConvBlock(planes, use_bn=True, use_act=False, is_training=is_training)
     self.downsample = downsample
Exemplo n.º 7
0
    def __init__(self, depth=256, is_training=True):
        self.avg_pool_conv = ConvBlock(depth,
                                       1,
                                       is_training=is_training,
                                       name='avg_pool')

        self.atrous_pool_block_1 = ConvBlock(depth,
                                             1,
                                             is_training=is_training,
                                             name='block1')
        self.atrous_pool_block_6 = ConvBlock(depth,
                                             3,
                                             dilation=6,
                                             is_training=is_training,
                                             name='block2')
        self.atrous_pool_block_12 = ConvBlock(depth,
                                              3,
                                              dilation=12,
                                              is_training=is_training,
                                              name='block3')
        self.atrous_pool_block_18 = ConvBlock(depth,
                                              3,
                                              dilation=18,
                                              is_training=is_training,
                                              name='block4')

        self.conv_out = ConvBlock(depth,
                                  1,
                                  is_training=is_training,
                                  name='conv1')
Exemplo n.º 8
0
    def __init__(self, g_theta_layer, f_phi_layer, input_dim, embedding_dim,
                 rnn_dim, q_att_layer, answer_vocab_size, fixed_embed,
                 **kwargs):
        super(Model, self).__init__()

        kernel_size = 1
        stride = 1
        self.encoder_layer = list()
        pad = 0
        #
        self.encoder_layer.append(
            nn.Conv2d(1024, 512, kernel_size, stride, pad, bias=False))
        self.encoder_layer.append(nn.BatchNorm2d(512))
        self.encoder_layer.append(nn.ReLU())
        self.encoder_layer.append(
            nn.Conv2d(512, 512, kernel_size, stride, pad, bias=False))
        self.encoder_layer.append(nn.BatchNorm2d(512))
        self.encoder_layer.append(nn.ReLU())

        self.encode = nn.Sequential(*self.encoder_layer)

        if input_dim == 128:
            self.reduced_dim = (14, 14)
        elif input_dim == 320:
            self.reduced_dim = (20, 30)
        prev_channel = 512

        self.grid_coord = Coordinate(self.reduced_dim)

        with open('glove_60b_{}.pkl'.format(embedding_dim), 'rb') as f:
            weight = pickle.load(f)
            self.embedding = nn.Embedding(*weight.shape)
            self.embedding.load_state_dict({'weight': torch.Tensor(weight)})

        if fixed_embed:
            self.embedding.weight.requires_grad = False

        self.gru = nn.GRU(input_size=embedding_dim,
                          hidden_size=rnn_dim,
                          bidirectional=True)

        self.g_theta_layer = list()

        prev_channel = (prev_channel + 2) + rnn_dim * 2 * 2

        self.q_att = Attention(rnn_dim * 2, q_att_layer)

        self.q_att_2 = Attention(rnn_dim * 2, q_att_layer)

        self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer)
        # self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer)

        self.f_phi_layer = list()

        prev_channel = g_theta_layer[-1]

        for layer_num, channel in enumerate(f_phi_layer):
            # self.f_phi_layer.append(nn.Linear(prev_channel, channel))
            self.f_phi_layer.append(
                nn.utils.weight_norm(nn.Linear(prev_channel, channel)))
            self.f_phi_layer.append(nn.ReLU())
            prev_channel = channel

        self.f_phi = nn.Sequential(*self.f_phi_layer)

        self.classifier = nn.Sequential(
            *[nn.Linear(prev_channel, answer_vocab_size, bias=False)])
Exemplo n.º 9
0
    def __init__(self, g_theta_layer, f_phi_layer, input_dim,
                 answer_vocab_size, fixed_embed, rnn_dim, **kwargs):
        super(Model, self).__init__()

        kernel_size = 1
        stride = 1
        self.encoder_layer = list()
        pad = 0
        #
        self.encoder_layer.append(
            nn.Conv2d(1024, 1024, kernel_size, stride, pad, bias=False))
        self.encoder_layer.append(nn.BatchNorm2d(1024))
        self.encoder_layer.append(nn.ReLU())
        self.encoder_layer.append(
            nn.Conv2d(1024, 1024, kernel_size, stride, pad, bias=False))
        self.encoder_layer.append(nn.BatchNorm2d(1024))
        self.encoder_layer.append(nn.ReLU())

        self.encode = nn.Sequential(*self.encoder_layer)

        if input_dim == 128:
            self.reduced_dim = (14, 14)
        elif input_dim == 320:
            self.reduced_dim = (20, 30)

        prev_channel = 1024

        self.grid_coord = Coordinate(self.reduced_dim)

        self.bert = BertModel.from_pretrained('bert-base-uncased')

        if fixed_embed:
            self.bert.eval()

        bert_channel = 768
        text_channel = rnn_dim
        # self.text_encoder =  FC_ReLU(bert_channel, text_channel)
        self.text_encoder = nn.Linear(bert_channel, text_channel)

        # self.gru = nn.GRU(input_size=768, hidden_size=text_channel // 2,
        #                   bidirectional=True)

        self.gru = nn.GRU(input_size=768,
                          hidden_size=text_channel,
                          bidirectional=True)
        self.g_theta_layer = list()

        prev_channel = (prev_channel + 2) + text_channel * 2

        # self.q_att = Attention(rnn_dim * 2, rnn_dim)

        # self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer).cuda()
        self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer)

        self.f_phi_layer = list()

        prev_channel = g_theta_layer[-1]

        for layer_num, channel in enumerate(f_phi_layer):
            # self.f_phi_layer.append(nn.Linear(prev_channel, channel))
            self.f_phi_layer.append(
                nn.utils.weight_norm(nn.Linear(prev_channel, channel)))
            self.f_phi_layer.append(nn.ReLU())
            prev_channel = channel

        self.f_phi = nn.Sequential(*self.f_phi_layer)

        self.classifier = nn.Sequential(
            *[nn.Linear(prev_channel, answer_vocab_size, bias=False)])