def feed_forward(self, x: tf.Tensor, is_training: bool = True, nodes_to_return: RequiredNodes = None) -> NetworkOutput: if self._image_mean is not None: x -= self._image_mean x = tf.math.divide(x, 255.0) input_size = x.shape[1], x.shape[2] conv3_sub1_proj = self.__big_images_branch(x=x, is_training=is_training) conv3_1 = self.__medium_images_branch(x) conv3_1_sub2_proj = bottleneck_conv2d(x=conv3_1, num_filters=128, activation=None, name='conv3_1_sub2_proj') conv_sub4 = self.__small_images_branch(conv3_1=conv3_1, input_size=input_size) conv_sub2 = self.__medium_small_branch_fusion( conv_sub4=conv_sub4, conv3_1_sub2_proj=conv3_1_sub2_proj, is_training=is_training) conv6_cls = self.__big_medium_branch_fusion( conv_sub2=conv_sub2, conv3_sub1_proj=conv3_sub1_proj) conv6_interp = upsample_bilinear(x=conv6_cls, zoom_factor=4) return self._construct_output(feedforward_output=conv6_interp, nodes_to_return=nodes_to_return)
def __small_images_branch(self, conv3_1: tf.Tensor, input_size: Size, is_training: bool = True) -> tf.Tensor: conv3_1_sub4 = downsample_bilinear(x=conv3_1, shrink_factor=2) conv_5_3 = self.__residual_encoder_chain( x=conv3_1_sub4, encoders_configs=ICNetBackbone.__SMALL_BRANCH_ENCODERS_CONFIGS, is_training=is_training) pyramid_pooling_config = [((32, 64), (32, 64)), ((16, 32), (16, 32)), ((13, 25), (10, 20)), ((8, 16), (5, 10))] pooling_output_size = input_size[0] // 32, input_size[1] // 32 conv_5_3_sum = pyramid_pooling(x=conv_5_3, pooling_config=pyramid_pooling_config, output_size=pooling_output_size) conv5_4_k1 = bottleneck_conv2d(x=conv_5_3_sum, num_filters=256, name='conv5_4_k1') conv5_4_k1_bn = tf.layers.batch_normalization(inputs=conv5_4_k1, training=is_training, name='conv5_4_k1_bn') conv5_4_interp = upsample_bilinear(x=conv5_4_k1_bn, zoom_factor=2) conv_sub4 = atrous_conv2d(x=conv5_4_interp, num_filters=128, kernel_size=(3, 3), name='conv_sub4') return tf.layers.batch_normalization(inputs=conv_sub4, training=is_training, name='conv_sub4_bn')
def feed_forward(self, x: tf.Tensor, is_training: bool = True, nodes_to_return: RequiredNodes = None) -> NetworkOutput: if self._image_mean is not None: x -= self._image_mean x = tf.math.divide(x, 255.0) big_branch_output = self.__big_images_branch(x=x, is_training=is_training) half_size_input = downsample_bilinear(x=x, shrink_factor=2) medium_branch_common = self.__medium_branch_head( x=half_size_input, is_training=is_training) medium_branch_tail = self.__medium_branch_tail(x=medium_branch_common, is_training=is_training) small_branch_output = self.__small_branch(x=medium_branch_common, is_training=is_training) medium_small_fusion = self.__medium_small_branch_fusion( small_branch_output=small_branch_output, medium_branch_output=medium_branch_tail, is_training=is_training) big_medium_fusion = self.__big_medium_branch_fusion( fused_medium_branch=medium_small_fusion, big_branch_outtput=big_branch_output, is_training=is_training) cls = self.__prediction_branch(big_medium_fusion=big_medium_fusion) cls_up = upsample_bilinear(x=cls, zoom_factor=4) out = tf.math.argmax(cls_up, axis=3, output_type=tf.dtypes.int32) return self._construct_output(feedforward_output=out, nodes_to_return=nodes_to_return)
def __prediction_branch(self, big_medium_fusion: tf.Tensor) -> tf.Tensor: quater_size_output = upsample_bilinear(x=big_medium_fusion, zoom_factor=2) return dim_hold_conv2d(x=quater_size_output, num_filters=self._output_classes, kernel_size=(3, 3), activation=None, name='cls')
def __branch_fusion( self, first_branch: tf.Tensor, second_branch: tf.Tensor, output_filters: int, fusion_name: str, output_name: str, output_activation: Optional[str] = 'relu', dilation_rate: Tuple[int, int] = (2, 2) ) -> tf.Tensor: sum = tf.add(first_branch, second_branch, name=fusion_name) sum_relu = tf.nn.relu(sum, name=f'{fusion_name}/relu') sum_interp = upsample_bilinear(x=sum_relu, zoom_factor=2) return atrous_conv2d(x=sum_interp, num_filters=output_filters, kernel_size=(3, 3), dilation_rate=dilation_rate, activation=output_activation, name=output_name)
def __cascade_fusion_block( self, smaller_input: tf.Tensor, bigger_input: tf.Tensor, is_training: bool, output_filters: int, base_name: str, ) -> tf.Tensor: upsampled = upsample_bilinear(x=smaller_input, zoom_factor=2) upsampled = dim_hold_conv2d(x=upsampled, num_filters=output_filters, kernel_size=(3, 3), name=f'{base_name}/fusion_conv') upsampled_bn = tf.layers.batch_normalization( inputs=upsampled, training=is_training, name=f'{base_name}/fusion_conv_bn') bigger_input = bottleneck_conv2d(x=bigger_input, num_filters=output_filters, name=f'{base_name}/bigger_input_fs') out = tf.math.add(upsampled_bn, bigger_input, name=f'{base_name}/add') return tf.nn.relu(out, name=f'{base_name}/relu')