def __init__(self, planes, stride=1, downsample=None, is_training=True): self.conv_block1 = ConvBlock(planes, 1, use_bn=True, use_act=True, is_training=is_training, name='conv1') self.conv_block2 = ConvBlock(planes, stride=stride, use_bn=True, use_act=True, is_training=is_training, name='conv2') self.conv_block3 = ConvBlock(planes * self.expansion, 1, use_bn=True, use_act=False, is_training=is_training, name='conv3') self.downsample = downsample
def __init__(self, sess, config, is_training=True): self.resnet_model = resnet.__dict__.get(config.resnet_model)( sess, pretrained=True, is_training=is_training) self.aspp = ASPP(is_training=is_training) self.conv = ConvBlock(config.num_classes, 1, 1, is_training=is_training, name='conv1')
def __init__(self, block, layers, num_classes=1000, is_training=True, scope=None): self.scope = scope self.inplanes = 64 self.conv_block1 = ConvBlock(64, 7, 2, use_bn=True, use_act=True, is_training=is_training, name='conv1') self.maxpool = MaxPool2d(3, 2, "same") self.layer1 = self._make_layer(block, 64, layers[0], is_training=is_training, name='block1') self.layer2 = self._make_layer(block, 128, layers[1], stride=2, is_training=is_training, name='block2') self.layer3 = self._make_layer(block, 256, layers[2], stride=2, is_training=is_training, name='block3') self.layer4 = self._make_layer(block, 512, layers[3], stride=2, is_training=is_training, name='block4') self.avgpool = AvgPool2d(7, 7) self.fc = FullyConnected(num_classes)
class ASPP(object): ''' AtrousSpatialPyramidPoolingModule: consists of (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16 (all with 256 filters and batch normalization) ''' def __init__(self, depth=256, is_training=True): self.avg_pool_conv = ConvBlock(depth, 1, is_training=is_training, name='avg_pool') self.atrous_pool_block_1 = ConvBlock(depth, 1, is_training=is_training, name='block1') self.atrous_pool_block_6 = ConvBlock(depth, 3, dilation=6, is_training=is_training, name='block2') self.atrous_pool_block_12 = ConvBlock(depth, 3, dilation=12, is_training=is_training, name='block3') self.atrous_pool_block_18 = ConvBlock(depth, 3, dilation=18, is_training=is_training, name='block4') self.conv_out = ConvBlock(depth, 1, is_training=is_training, name='conv1') def forward(self, inputs): with tf.variable_scope('aspp'): feature_map_size = tf.shape(inputs) # Global average pooling image_features = tf.reduce_mean(inputs, axis=[1, 2], keepdims=True) image_features = self.avg_pool_conv.forward(image_features) image_features = tf.image.resize_bilinear( image_features, (feature_map_size[1], feature_map_size[2])) out_1x1_1 = self.atrous_pool_block_1.forward(inputs) out_3x3_6 = self.atrous_pool_block_6.forward(inputs) out_3x3_12 = self.atrous_pool_block_12.forward(inputs) out_3x3_18 = self.atrous_pool_block_18.forward(inputs) out = tf.concat( [image_features, out_1x1_1, out_3x3_6, out_3x3_12, out_3x3_18], axis=3) out = self.conv_out.forward(out) return out def __call__(self, x): return self.forward(x)
def _make_layer(self, block, planes, blocks, stride=1, is_training=True, name=None): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = ConvBlock(planes * block.expansion, 1, stride, use_bn=True, is_training=is_training, name='shortcut') layers = [] layers.append(block(planes, stride, downsample, is_training)) self.inplanes = planes * block.expansion for _ in xrange(1, blocks): layers.append(block(planes)) return Sequential(layers, name)
def __init__(self, planes, stride=1, downsample=None, is_training=True): self.conv_block1 = ConvBlock(planes, stride=stride, use_bn=True, use_act=True, is_training=is_training) self.conv_block2 = ConvBlock(planes, use_bn=True, use_act=False, is_training=is_training) self.downsample = downsample
def __init__(self, depth=256, is_training=True): self.avg_pool_conv = ConvBlock(depth, 1, is_training=is_training, name='avg_pool') self.atrous_pool_block_1 = ConvBlock(depth, 1, is_training=is_training, name='block1') self.atrous_pool_block_6 = ConvBlock(depth, 3, dilation=6, is_training=is_training, name='block2') self.atrous_pool_block_12 = ConvBlock(depth, 3, dilation=12, is_training=is_training, name='block3') self.atrous_pool_block_18 = ConvBlock(depth, 3, dilation=18, is_training=is_training, name='block4') self.conv_out = ConvBlock(depth, 1, is_training=is_training, name='conv1')
def __init__(self, g_theta_layer, f_phi_layer, input_dim, embedding_dim, rnn_dim, q_att_layer, answer_vocab_size, fixed_embed, **kwargs): super(Model, self).__init__() kernel_size = 1 stride = 1 self.encoder_layer = list() pad = 0 # self.encoder_layer.append( nn.Conv2d(1024, 512, kernel_size, stride, pad, bias=False)) self.encoder_layer.append(nn.BatchNorm2d(512)) self.encoder_layer.append(nn.ReLU()) self.encoder_layer.append( nn.Conv2d(512, 512, kernel_size, stride, pad, bias=False)) self.encoder_layer.append(nn.BatchNorm2d(512)) self.encoder_layer.append(nn.ReLU()) self.encode = nn.Sequential(*self.encoder_layer) if input_dim == 128: self.reduced_dim = (14, 14) elif input_dim == 320: self.reduced_dim = (20, 30) prev_channel = 512 self.grid_coord = Coordinate(self.reduced_dim) with open('glove_60b_{}.pkl'.format(embedding_dim), 'rb') as f: weight = pickle.load(f) self.embedding = nn.Embedding(*weight.shape) self.embedding.load_state_dict({'weight': torch.Tensor(weight)}) if fixed_embed: self.embedding.weight.requires_grad = False self.gru = nn.GRU(input_size=embedding_dim, hidden_size=rnn_dim, bidirectional=True) self.g_theta_layer = list() prev_channel = (prev_channel + 2) + rnn_dim * 2 * 2 self.q_att = Attention(rnn_dim * 2, q_att_layer) self.q_att_2 = Attention(rnn_dim * 2, q_att_layer) self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer) # self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer) self.f_phi_layer = list() prev_channel = g_theta_layer[-1] for layer_num, channel in enumerate(f_phi_layer): # self.f_phi_layer.append(nn.Linear(prev_channel, channel)) self.f_phi_layer.append( nn.utils.weight_norm(nn.Linear(prev_channel, channel))) self.f_phi_layer.append(nn.ReLU()) prev_channel = channel self.f_phi = nn.Sequential(*self.f_phi_layer) self.classifier = nn.Sequential( *[nn.Linear(prev_channel, answer_vocab_size, bias=False)])
def __init__(self, g_theta_layer, f_phi_layer, input_dim, answer_vocab_size, fixed_embed, rnn_dim, **kwargs): super(Model, self).__init__() kernel_size = 1 stride = 1 self.encoder_layer = list() pad = 0 # self.encoder_layer.append( nn.Conv2d(1024, 1024, kernel_size, stride, pad, bias=False)) self.encoder_layer.append(nn.BatchNorm2d(1024)) self.encoder_layer.append(nn.ReLU()) self.encoder_layer.append( nn.Conv2d(1024, 1024, kernel_size, stride, pad, bias=False)) self.encoder_layer.append(nn.BatchNorm2d(1024)) self.encoder_layer.append(nn.ReLU()) self.encode = nn.Sequential(*self.encoder_layer) if input_dim == 128: self.reduced_dim = (14, 14) elif input_dim == 320: self.reduced_dim = (20, 30) prev_channel = 1024 self.grid_coord = Coordinate(self.reduced_dim) self.bert = BertModel.from_pretrained('bert-base-uncased') if fixed_embed: self.bert.eval() bert_channel = 768 text_channel = rnn_dim # self.text_encoder = FC_ReLU(bert_channel, text_channel) self.text_encoder = nn.Linear(bert_channel, text_channel) # self.gru = nn.GRU(input_size=768, hidden_size=text_channel // 2, # bidirectional=True) self.gru = nn.GRU(input_size=768, hidden_size=text_channel, bidirectional=True) self.g_theta_layer = list() prev_channel = (prev_channel + 2) + text_channel * 2 # self.q_att = Attention(rnn_dim * 2, rnn_dim) # self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer).cuda() self.g_theta = ConvBlock(1, 1, 0, prev_channel, g_theta_layer) self.f_phi_layer = list() prev_channel = g_theta_layer[-1] for layer_num, channel in enumerate(f_phi_layer): # self.f_phi_layer.append(nn.Linear(prev_channel, channel)) self.f_phi_layer.append( nn.utils.weight_norm(nn.Linear(prev_channel, channel))) self.f_phi_layer.append(nn.ReLU()) prev_channel = channel self.f_phi = nn.Sequential(*self.f_phi_layer) self.classifier = nn.Sequential( *[nn.Linear(prev_channel, answer_vocab_size, bias=False)])