def _block_up(self, down_inputs, up_inputs, filters, padding, scope, upsampling=True): with tf.variable_scope(scope): # upsample first if upsampling: up_inputs = layers.upsampling_2d(inputs=up_inputs, factors=[2, 2]) out = layers.crop_to_fit(down_inputs=down_inputs, up_inputs=up_inputs) out = layers.conv2d(inputs=out, filters=filters, kernel_size=3, stride_size=1, padding=padding, normalization='instance_normalization', activation=tf.nn.relu, mode=self.mode, name='conv1') out = layers.conv2d(inputs=out, filters=filters, kernel_size=3, stride_size=1, padding=padding, normalization='instance_normalization', activation=tf.nn.relu, mode=self.mode, name='conv2') return out
def _block_down(self, inputs, filters, padding, scope): with tf.variable_scope(scope): # downsampling path out = layers.conv2d(inputs=inputs, filters=filters, kernel_size=3, stride_size=1, padding=padding, normalization='instance_normalization', activation=tf.nn.relu, mode=self.mode, name='conv1') out = layers.conv2d(inputs=out, filters=filters, kernel_size=3, stride_size=1, padding=padding, normalization='instance_normalization', activation=tf.nn.relu, mode=self.mode, name='conv2') maxp = layers.max_pooling2d(inputs=out, pool_size=2, strides=2, padding='VALID') return out, maxp
def encoder(x, latent_dim, name='encoder'): with tf.variable_scope(name): out = layers.conv2d(input_=x, filters=32, kernel_size=3, stride_size=2, padding='SAME', activation=tf.nn.relu, kernel_initializer=initializer, name='conv2d_1') out = layers.conv2d(input_=out, filters=64, kernel_size=3, stride_size=2, padding='SAME', activation=tf.nn.relu, kernel_initializer=initializer, name='conv2d_2') out = layers.flatten(out) out = layers.fc(input_=out, units=16, activation=tf.nn.relu, kernel_initializer=initializer, name='fc_1') mu = layers.fc(input_=out, units=latent_dim, activation=None, kernel_initializer=initializer, name='mu') logvar = layers.fc(input_=out, units=latent_dim, activation=None, kernel_initializer=initializer, name='logvar') return mu, logvar
def _build_graph(self): # downsampling path init_filter = 16 self.next_images = self.datapipe.next_images down_out1, maxp1 = self._block_down(inputs=self.next_images, filters=init_filter, padding='SAME', name='down_block1') down_out2, maxp2 = self._block_down(inputs=maxp1, filters=init_filter * 2, padding='SAME', name='down_block2') down_out3, maxp3 = self._block_down(inputs=maxp2, filters=init_filter * 4, padding='SAME', name='down_block3') down_out4, maxp4 = self._block_down(inputs=maxp3, filters=init_filter * 8, padding='SAME', name='down_block4') down_out5, maxp5 = self._block_down(inputs=maxp4, filters=init_filter * 16, padding='SAME', name='down_block5') # upsampling path up_out4 = self._block_up(down_inputs=down_out4, up_inputs=down_out5, filters=init_filter * 8, padding='SAME', name='up_block4') up_out3 = self._block_up(down_inputs=down_out3, up_inputs=up_out4, filters=init_filter * 4, padding='SAME', name='up_block3') up_out2 = self._block_up(down_inputs=down_out2, up_inputs=up_out3, filters=init_filter * 2, padding='SAME', name='up_block2') up_out1 = self._block_up(down_inputs=down_out1, up_inputs=up_out2, filters=init_filter, padding='SAME', name='up_block1') # final layers ## TODO self.logits = layers.conv2d(inputs=up_out1, filters=self.num_classes, kernel_size=1, stride_size=1, padding='SAME', normalization=None, activation=None, mode=self.mode, name='final_layer') self.preds = tf.nn.sigmoid(self.logits) self.next_labels = self.datapipe.next_labels
def _build_conv2d(inputs, spec, name): opts = _get_options(spec) inputs = layers.conv2d(inputs=inputs, filters=spec['filters'], kernel_size=spec['kernel_size'], stride_size=spec['stride_size'], padding=opts['padding'], kernel_initializer=opts['kernel_initializer'], bias_initializer=opts['bias_initializer'], activation=opts['activation'], name=name) return inputs
def _build_graph(self, images, log_scope): ''' # initial log scope if self.log_scope is None: shape = self.images.get_shape().as_list() B, H, W, C = 16, shape[1], shape[2], 1 # np.ones([self.datapipe.batch_size, H, W, C]).astype(np.float32) self.log_scope = tf.get_variable('log_scope0', shape=[B, H, W, C], initializer=tf.zeros_initializer(), dtype=tf.float32, trainable=False) ''' inputs = tf.concat([images, log_scope], axis=-1) # downsampling path init_filter = 16 down_out1, maxp1 = self._block_down(inputs=inputs, filters=init_filter, padding='SAME', scope='down_block1') down_out2, maxp2 = self._block_down(inputs=maxp1, filters=init_filter * 2, padding='SAME', scope='down_block2') down_out3, maxp3 = self._block_down(inputs=maxp2, filters=init_filter * 4, padding='SAME', scope='down_block3') down_out4, maxp4 = self._block_down(inputs=maxp3, filters=init_filter * 8, padding='SAME', scope='down_block4') down_out5, maxp5 = self._block_down(inputs=maxp4, filters=init_filter * 16, padding='SAME', scope='down_block5') # they put a 3-layer MLP here shape = down_out4.get_shape().as_list() H, W, C = shape[1], shape[2], shape[3] print('down_out4: ', shape) print('down_out5: ', down_out5.shape) out = layers.flatten(down_out5) print('flatten shape: ', out.shape) out = layers.dense(inputs=out, units=128, activation=tf.nn.relu, name='layer1') out = layers.dense(inputs=out, units=128, activation=tf.nn.relu, name='layer2') out = layers.dense(inputs=out, units=H * W * C, activation=tf.nn.relu, name='layer3') out = tf.reshape(out, shape=[-1, H, W, C]) print('upsampling input shape: ', out.shape) # upsampling path up_out4 = self._block_up(down_inputs=down_out4, up_inputs=out, filters=init_filter * 8, padding='SAME', scope='up_block4', upsampling=False) print('built up_out4...') up_out3 = self._block_up(down_inputs=down_out3, up_inputs=up_out4, filters=init_filter * 4, padding='SAME', scope='up_block3') print('built up_out3...') up_out2 = self._block_up(down_inputs=down_out2, up_inputs=up_out3, filters=init_filter * 2, padding='SAME', scope='up_block2') print('built up_out2...') up_out1 = self._block_up(down_inputs=down_out1, up_inputs=up_out2, filters=init_filter, padding='SAME', scope='up_block1') print('built up_out1...') # final layers ## TODO logits = layers.conv2d(inputs=up_out1, filters=1, kernel_size=1, stride_size=1, padding='SAME', normalization=None, activation=None, mode=self.mode, name='final_layer') ''' # log softmax DOES NOT WORK PROPERLY # compute log_softmax for the current attention shape = tf.shape(logits) N, H, W, C = shape[0], shape[1], shape[2], shape[3] print('logits shape before: ', logits.shape) logits = layers.flatten(logits) print('logits shape: ', logits.shape) log_softmax = tf.nn.log_softmax(logits=logits, axis=-1) log_a_k = tf.reshape(log_softmax, [N, H, W, C]) print('log_softmax shape: ', log_a_k.shape) ''' log_a_k = tf.log_sigmoid(logits) return log_a_k