def neuronet_3d(inputs, num_classes, protocols, num_res_units=2, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), mode=tf.estimator.ModeKeys.EVAL, use_bias=False, activation=tf.nn.relu6, kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): """ NeuroNet [1] is a multi-task image segmentation network based on an FCN architecture [2] using residual units [3] as feature extractors. Downsampling and upsampling of features is done via strided convolutions and transpose convolutions, respectively. On each resolution scale s are num_residual_units with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] M. Rajchl et al. NeuroNet: Fast and Robust Reproduction of Multiple Brain Image Segmentation Pipelines. MIDL 2018. [2] J. Long et al. Fully convolutional networks for semantic segmentation. CVPR 2015. [3] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output classes. num_res_units (int, optional): Number of residual units at each resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors """ outputs = {} assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' assert len(protocols) == len(num_classes) conv_params = {'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer} x = inputs # Inital convolution with filters[0] x = tf.layers.conv3d(inputs=x, filters=filters[0], kernel_size=(3, 3, 3), strides=strides[0], padding='same', **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] with tf.variable_scope('encoder'): for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) outputs['encoder_out'] = x tails = [] for tail in range(len(num_classes)): # Create a separate prediction tail for each labeling protocol to learn with tf.variable_scope('tail_{}'.format(tail)): x = outputs['encoder_out'] for res_scale in range(len(filters) - 2, -1, -1): # Upscore layers [2] reconstruct the predictions to # higher resolution scales with tf.variable_scope('upscore_{}'.format(res_scale)): x = upscore_layer_3d( inputs=x, inputs2=res_scales[res_scale], out_filters=num_classes[tail], strides=saved_strides[res_scale], mode=mode, **conv_params) tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Last convolution with tf.variable_scope('last'): tails.append(tf.layers.conv3d(inputs=x, filters=num_classes[tail], kernel_size=(1, 1, 1), strides=(1, 1, 1), padding='same', **conv_params)) tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs for i in range(len(tails)): outputs['logits_{}'.format(protocols[i])] = tails[i] with tf.variable_scope('pred'): outputs['y_prob_{}'.format(protocols[i])] = tf.nn.softmax(tails[i]) outputs['y_{}'.format(protocols[i])] = tf.argmax(tails[i], axis=-1) return outputs
def residual_unet_3d(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), mode=tf.estimator.ModeKeys.EVAL, use_bias=False, kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): """ Image segmentation network based on a flexible UNET architecture [1] using residual units [2] as feature extractors. Downsampling and upsampling of features is done via strided convolutions and transpose convolutions, respectively. On each resolution scale s are num_residual_units with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] O. Ronneberger et al. U-Net: Convolutional Networks for Biomedical Image Segmentation. MICCAI 2015. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output classes. num_res_units (int, optional): Number of residual units at each resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors """ outputs = {} assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' conv_params = { 'padding': 'same', 'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer } x = inputs # Initial convolution with filters[0] x = tf.layers.conv3d(inputs=x, filters=filters[0], kernel_size=(3, 3, 3), strides=strides[0], **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('enc_unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('enc_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Upsample and concat layers [1] reconstruct the predictions to higher # resolution scales for res_scale in range(len(filters) - 2, -1, -1): with tf.variable_scope('up_concat_{}'.format(res_scale)): x = upsample_and_concat(inputs=x, inputs2=res_scales[res_scale], strides=saved_strides[res_scale]) for i in range(0, num_res_units): with tf.variable_scope('dec_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), mode=mode) tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Last convolution with tf.variable_scope('last'): x = tf.layers.conv3d(inputs=x, filters=num_classes, kernel_size=(1, 1, 1), strides=(1, 1, 1), **conv_params) tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs outputs['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) outputs['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) outputs['y_'] = y_ return outputs
def resnet_3d(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), mode=tf.estimator.ModeKeys.EVAL, use_bias=False, activation=tf.nn.relu6, kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): """ Regression/classification network based on a flexible resnet architecture [1] using residual units proposed in [2]. The downsampling of features is done via strided convolutions. On each resolution scale s are num_convolutions with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] K. He et al. Deep residual learning for image recognition. CVPR 2016. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output channels or classes. num_res_units (int, optional): Number of residual units per resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. activation (optional): A function to use as activation function. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors """ outputs = {} assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' relu_op = tf.nn.relu6 conv_params = { 'padding': 'same', 'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer } # Segmentation Feature Aggregation x = inputs F = tf.layers.conv3d(inputs=x[:, :, :, :, 1:], filters=1, kernel_size=1, strides=(1, 1, 1), **conv_params) x = tf.concat(values=[x, F], axis=4) # VISUALIZE SEGMENTATION FEATURE MAPS shape = F.get_shape().as_list() ydim = shape[2] xdim = shape[3] featuremaps = shape[4] F = tf.slice(F, (0, 0, 0, 0, 0), (1, 1, -1, -1, -1)) F = tf.reshape(F, (ydim, xdim, featuremaps)) ydim += 2 xdim += 2 F = tf.image.resize_image_with_crop_or_pad(F, ydim, xdim) F = tf.reshape(F, (ydim, xdim, 1, 1)) F = tf.transpose(F, (2, 0, 3, 1)) F = tf.reshape(F, (1, 1 * ydim, 1 * xdim, 1)) tf.summary.image('Segmentation Feature Maps', F, 50) # Initial convolution with filters[0] k = [s * 2 if s > 1 else 3 for s in strides[0]] x = tf.layers.conv3d(x, filters[0], k, strides[0], **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], activation=activation, mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), activation=activation, mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Global pool and last unit with tf.variable_scope('pool'): x = tf.layers.batch_normalization( x, training=mode == tf.estimator.ModeKeys.TRAIN) x = relu_op(x) axis = tuple(range(len(x.get_shape().as_list())))[1:-1] x = tf.reduce_mean(x, axis=axis, name='global_avg_pool') tf.logging.info('Global pool shape {}'.format(x.get_shape())) with tf.variable_scope('last'): x = tf.layers.dense( inputs=x, units=num_classes, activation=None, use_bias=conv_params['use_bias'], kernel_initializer=conv_params['kernel_initializer'], bias_initializer=conv_params['bias_initializer'], kernel_regularizer=conv_params['kernel_regularizer'], bias_regularizer=conv_params['bias_regularizer'], name='hidden_units') tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs outputs['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) outputs['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) outputs['y_'] = y_ return outputs
def residual_fcn_3d(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), mode=tf.estimator.ModeKeys.EVAL, use_bias=False, activation=tf.nn.relu6, kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): """ Image segmentation network based on an FCN architecture [1] using residual units [2] as feature extractors. Downsampling and upsampling of features is done via strided convolutions and transpose convolutions, respectively. On each resolution scale s are num_residual_units with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] J. Long et al. Fully convolutional networks for semantic segmentation. CVPR 2015. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output classes. num_res_units (int, optional): Number of residual units at each resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. activation (optional): A function to use as activation function. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors """ outputs = {} assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' conv_params = {'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer} x = inputs # Inital convolution with filters[0] x = tf.layers.conv3d(inputs=x, filters=filters[0], kernel_size=(3, 3, 3), strides=strides[0], padding='same', **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], activation=activation, mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), activation=activation, mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Upscore layers [2] reconstruct the predictions to higher resolution # scales for res_scale in range(len(filters) - 2, -1, -1): with tf.variable_scope('upscore_{}'.format(res_scale)): x = upscore_layer_3d( inputs=x, inputs2=res_scales[res_scale], out_filters=num_classes, strides=saved_strides[res_scale], mode=mode, **conv_params) tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Last convolution with tf.variable_scope('last'): x = tf.layers.conv3d(inputs=x, filters=num_classes, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding='same', **conv_params) tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs outputs['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) outputs['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) outputs['y_'] = y_ return outputs
def residual_3DPNet(inputs, num_classes, mode=tf.estimator.ModeKeys.EVAL, seg__num_res_units=1, seg__filters=(16, 32, 64, 128), seg__strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), seg__use_bias=False, seg__activation=leaky_relu, seg__kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), seg__bias_initializer=tf.zeros_initializer(), seg__kernel_regularizer=None, seg__bias_regularizer=None, seg__bottleneck=False, clf__num_res_units=1, clf__filters=(16, 32, 64, 128), clf__strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), clf__use_bias=False, clf__activation=tf.nn.relu6, clf__kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), clf__bias_initializer=tf.zeros_initializer(), clf__kernel_regularizer=None, clf__bias_regularizer=None): # RESIDUAL 3D U-NET (residual_3D_unet) // SEGMENTATION { """ Image segmentation network based on a flexible UNET architecture [1] using residual units [2] as feature extractors. Downsampling and upsampling of features is done via strided convolutions and transpose convolutions, respectively. On each resolution scale s are num_residual_units with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] O. Ronneberger et al. U-Net: Convolutional Networks for Biomedical Image Segmentation. MICCAI 2015. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output classes. num_res_units (int, optional): Number of residual units at each resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. activation (optional): A function to use as activation function. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors }""" # Input: CT Patch Features # Output: Segmentation Feature Maps # INITIALIZATION assert len(seg__strides) == len(seg__filters) assert len(inputs.get_shape().as_list()) == 5, \ 'Inputs are required to have a rank of 5.' seg__conv_params = { 'padding': 'same', 'use_bias': seg__use_bias, 'kernel_initializer': seg__kernel_initializer, 'bias_initializer': seg__bias_initializer, 'kernel_regularizer': seg__kernel_regularizer, 'bias_regularizer': seg__bias_regularizer } x = inputs # Initial Convolution with (seg__filters[0]) x = tf.layers.conv3d(inputs=x, filters=seg__filters[0], kernel_size=(3, 3, 3), strides=seg__strides[0], **seg__conv_params) tf.logging.info( 'Segmentation Network: Initial Conv3D Tensor Shape: {}'.format( x.get_shape())) # FEATURE EXTRACTOR // ENCODER: # Residual Blocks with (seg__num_res_units) at Different Resolution Scales (res_scales) res_scales = [x] saved_strides = [] for res_scale in range(1, len(seg__filters)): # Features are Downsampled via Strided Convolutions ('seg__strides') with tf.variable_scope('enc_unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d(inputs=x, out_filters=seg__filters[res_scale], strides=seg__strides[res_scale], activation=seg__activation, mode=mode) saved_strides.append(seg__strides[res_scale]) for i in range(1, seg__num_res_units): with tf.variable_scope('enc_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=seg__filters[res_scale], strides=(1, 1, 1), activation=seg__activation, mode=mode) res_scales.append(x) tf.logging.info( 'Segmentation Network: Encoder at "res_scale" {} Tensor Shape: {}'. format(res_scale, x.get_shape())) # RESTORE SPATIAL DIMENSION // DECODER: # Upsample and Concatenate Layers and Reconstruct Predictions to Higher Resolution Scales for res_scale in range(len(seg__filters) - 2, -1, -1): with tf.variable_scope('up_concat_{}'.format(res_scale)): x = upsample_and_concat(inputs=x, inputs2=res_scales[res_scale], strides=saved_strides[res_scale]) for i in range(0, seg__num_res_units): with tf.variable_scope('dec_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=seg__filters[res_scale], strides=(1, 1, 1), mode=mode) tf.logging.info( 'Segmentation Network: Decoder at "res_scale" {} Tensor Shape: {}'. format(res_scale, x.get_shape())) # BOTTLENECK CONVOLUTION if seg__bottleneck: with tf.variable_scope('last'): x = tf.layers.conv3d(inputs=x, filters=num_classes, kernel_size=(1, 1, 1), strides=(1, 1, 1), **seg__conv_params) tf.logging.info( 'Segmentation Network: Bottleneck Output Tensor Shape: {}'.format( x.get_shape())) # OUTPUT residual3Dunet_output = x # VISUALIZE SEGMENTATION FEATURE MAPS F = residual3Dunet_output shape = F.get_shape().as_list() ydim = shape[2] xdim = shape[3] featuremaps = shape[4] F = tf.slice(F, (0, 0, 0, 0, 0), (1, 1, -1, -1, -1)) F = tf.reshape(F, (ydim, xdim, featuremaps)) ydim += 2 xdim += 2 F = tf.image.resize_image_with_crop_or_pad(F, ydim, xdim) F = tf.reshape(F, (ydim, xdim, 2, 4)) F = tf.transpose(F, (2, 0, 3, 1)) F = tf.reshape(F, (1, 2 * ydim, 4 * xdim, 1)) tf.summary.image('Segmentation Feature Maps', F, 20) # 3D RESNET (resnet_3d) // CLASSIFICATION { """ Regression/classification network based on a flexible resnet architecture [1] using residual units proposed in [2]. The downsampling of features is done via strided convolutions. On each resolution scale s are num_convolutions with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] K. He et al. Deep residual learning for image recognition. CVPR 2016. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output channels or classes. num_res_units (int, optional): Number of residual units per resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. activation (optional): A function to use as activation function. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors }""" # Input: CT Patch Features (concat) Segmentation Feature Maps # Output: Classification Scores (logits, y_prob, y_) # INITIALIZATION resnet3D_output = {} assert len(clf__strides) == len(clf__filters) assert len(inputs.get_shape().as_list()) == 5, \ 'Inputs are required to have a rank of 5.' relu_op = tf.nn.relu6 clf__conv_params = { 'padding': 'same', 'use_bias': clf__use_bias, 'kernel_initializer': clf__kernel_initializer, 'bias_initializer': clf__bias_initializer, 'kernel_regularizer': clf__kernel_regularizer, 'bias_regularizer': clf__bias_regularizer } # Concatenated CT + BatchNorm(Segmentation Features) --> Input Feed residual3Dunet_output = tf.layers.batch_normalization( inputs=residual3Dunet_output, axis=4) x = tf.concat(values=[inputs, residual3Dunet_output], axis=4) # Initial Convolution with (clf__filters[0]) k = [s * 2 if s > 1 else 3 for s in clf__strides[0]] x = tf.layers.conv3d(x, clf__filters[0], k, clf__strides[0], **clf__conv_params) tf.logging.info( 'Classification Network: Initial Conv3D Tensor Shape: {}'.format( x.get_shape())) # VISUALIZE PRIMARY CLASSIFICATION FEATURE MAPS F1 = x shape = F1.get_shape().as_list() ydim = shape[2] xdim = shape[3] featuremaps = shape[4] F1 = tf.slice(F1, (0, 0, 0, 0, 0), (1, 1, -1, -1, -1)) F1 = tf.reshape(F1, (ydim, xdim, featuremaps)) ydim += 2 xdim += 2 F1 = tf.image.resize_image_with_crop_or_pad(F1, ydim, xdim) F1 = tf.reshape(F1, (ydim, xdim, 2, 4)) F1 = tf.transpose(F1, (2, 0, 3, 1)) F1 = tf.reshape(F1, (1, 2 * ydim, 4 * xdim, 1)) tf.summary.image('Primary Classification Feature Maps', F1, 20) # FEATURE EXTRACTOR // ENCODER: # Residual Blocks with (clf__num_res_units) at Different Resolution Scales (res_scales) res_scales = [x] saved_strides = [] for res_scale in range(1, len(clf__filters)): # Features are Downsampled via Strided Convolutions ('clf__strides') with tf.variable_scope('unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d(inputs=x, out_filters=clf__filters[res_scale], strides=clf__strides[res_scale], activation=clf__activation, mode=mode) saved_strides.append(clf__strides[res_scale]) for i in range(1, clf__num_res_units): with tf.variable_scope('unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=clf__filters[res_scale], strides=(1, 1, 1), activation=clf__activation, mode=mode) res_scales.append(x) tf.logging.info( 'Classification Network: Encoder at "res_scale" {} Tensor Shape: {}' .format(res_scale, x.get_shape())) # GLOBAL POOLING + FINAL LAYER with tf.variable_scope('pool'): x = tf.layers.batch_normalization( x, training=mode == tf.estimator.ModeKeys.TRAIN) x = relu_op(x) axis = tuple(range(len(x.get_shape().as_list())))[1:-1] x = tf.reduce_mean(x, axis=axis, name='global_avg_pool') tf.logging.info( 'Classification Network: Global Pooling Tensor Shape: {}'.format( x.get_shape())) with tf.variable_scope('last'): x = tf.layers.dense( inputs=x, units=num_classes, activation=None, use_bias=clf__conv_params['use_bias'], kernel_initializer=clf__conv_params['kernel_initializer'], bias_initializer=clf__conv_params['bias_initializer'], kernel_regularizer=clf__conv_params['kernel_regularizer'], bias_regularizer=clf__conv_params['bias_regularizer'], name='hidden_units') tf.logging.info( 'Classification Network: Output Tensor Shape: {}'.format( x.get_shape())) # OUTPUT resnet3D_output['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) resnet3D_output['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) resnet3D_output['y_'] = y_ return resnet3D_output
def resnet_3d(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), mode=tf.estimator.ModeKeys.EVAL, use_bias=False, activation=tf.nn.relu6, kernel_initializer=tf.initializers.variance_scaling(distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): """ Regression/classification network based on a flexible resnet architecture [1] using residual units proposed in [2]. The downsampling of features is done via strided convolutions. On each resolution scale s are num_convolutions with filter size = filters[s]. strides[s] determine the downsampling factor at each resolution scale. [1] K. He et al. Deep residual learning for image recognition. CVPR 2016. [2] K. He et al. Identity Mappings in Deep Residual Networks. ECCV 2016. Args: inputs (tf.Tensor): Input feature tensor to the network (rank 5 required). num_classes (int): Number of output channels or classes. num_res_units (int, optional): Number of residual units per resolution scale. filters (tuple, optional): Number of filters for all residual units at each resolution scale. strides (tuple, optional): Stride of the first unit on a resolution scale. mode (TYPE, optional): One of the tf.estimator.ModeKeys strings: TRAIN, EVAL or PREDICT use_bias (bool, optional): Boolean, whether the layer uses a bias. activation (optional): A function to use as activation function. kernel_initializer (TYPE, optional): An initializer for the convolution kernel. bias_initializer (TYPE, optional): An initializer for the bias vector. If None, no bias will be applied. kernel_regularizer (None, optional): Optional regularizer for the convolution kernel. bias_regularizer (None, optional): Optional regularizer for the bias vector. Returns: dict: dictionary of output tensors """ outputs = {} assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' relu_op = tf.nn.relu6 conv_params = {'padding': 'same', 'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer} x = inputs # Inital convolution with filters[0] k = [s * 2 if s > 1 else 3 for s in strides[0]] x = tf.layers.conv3d(x, filters[0], k, strides[0], **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], activation=activation, mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d( inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), activation=activation, mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Global pool and last unit with tf.variable_scope('pool'): x = tf.layers.batch_normalization( x, training=mode == tf.estimator.ModeKeys.TRAIN) x = relu_op(x) axis = tuple(range(len(x.get_shape().as_list())))[1:-1] x = tf.reduce_mean(x, axis=axis, name='global_avg_pool') tf.logging.info('Global pool shape {}'.format(x.get_shape())) with tf.variable_scope('last'): x = tf.layers.dense(inputs=x, units=num_classes, activation=None, use_bias=conv_params['use_bias'], kernel_initializer=conv_params['kernel_initializer'], bias_initializer=conv_params['bias_initializer'], kernel_regularizer=conv_params['kernel_regularizer'], bias_regularizer=conv_params['bias_regularizer'], name='hidden_units') tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs outputs['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) outputs['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) outputs['y_'] = y_ return outputs
def residual_encoder(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), use_bias=False, activation=leaky_relu, mode=tf.estimator.ModeKeys.EVAL, kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): assert len(strides) == len(filters) assert len(inputs.get_shape().as_list()) == 5, \ 'inputs are required to have a rank of 5.' conv_params = { 'padding': 'same', 'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer } x = inputs # Initial convolution with filters[0] x = tf.layers.conv3d(inputs=x, filters=filters[0], kernel_size=(3, 3, 3), strides=strides[0], **conv_params) tf.logging.info('Init conv tensor shape {}'.format(x.get_shape())) # Residual feature encoding blocks with num_res_units at different # resolution scales res_scales res_scales = [x] saved_strides = [] for res_scale in range(1, len(filters)): # Features are downsampled via strided convolutions. These are defined # in `strides` and subsequently saved with tf.variable_scope('enc_unit_{}_0'.format(res_scale)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=strides[res_scale], activation=activation, mode=mode) saved_strides.append(strides[res_scale]) for i in range(1, num_res_units): with tf.variable_scope('enc_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), activation=activation, mode=mode) res_scales.append(x) tf.logging.info('Encoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Last convolution with tf.variable_scope('last'): last_conv = tf.layers.conv3d(inputs=x, filters=num_classes, kernel_size=(1, 1, 1), strides=(1, 1, 1), **conv_params) deconv_output = tf.reduce_mean(last_conv, axis=(1, 2, 3), name='globalaver') return x, res_scales, saved_strides, filters, deconv_output
def residual_decoder(inputs, num_classes, num_res_units=1, filters=(16, 32, 64, 128), res_scales=0, saved_strides=((1, 1, 1), (2, 2, 2), (2, 2, 2), (2, 2, 2)), use_bias=False, activation=leaky_relu, mode=tf.estimator.ModeKeys.EVAL, kernel_initializer=tf.initializers.variance_scaling( distribution='uniform'), bias_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None): outputs = {} conv_params = { 'padding': 'same', 'use_bias': use_bias, 'kernel_initializer': kernel_initializer, 'bias_initializer': bias_initializer, 'kernel_regularizer': kernel_regularizer, 'bias_regularizer': bias_regularizer } x = inputs # Upsample and concat layers [1] reconstruct the predictions to higher # resolution scales for res_scale in range(len(filters) - 2, -1, -1): with tf.variable_scope('up_concat_{}'.format(res_scale)): x = upsample_and_concat(inputs=x, inputs2=res_scales[res_scale], strides=saved_strides[res_scale]) for i in range(0, num_res_units): with tf.variable_scope('dec_unit_{}_{}'.format(res_scale, i)): x = vanilla_residual_unit_3d(inputs=x, out_filters=filters[res_scale], strides=(1, 1, 1), mode=mode) tf.logging.info('Decoder at res_scale {} tensor shape: {}'.format( res_scale, x.get_shape())) # Last convolution with tf.variable_scope('last'): x = tf.layers.conv3d(inputs=x, filters=num_classes, kernel_size=(1, 1, 1), strides=(1, 1, 1), **conv_params) tf.logging.info('Output tensor shape {}'.format(x.get_shape())) # Define the outputs outputs['logits'] = x with tf.variable_scope('pred'): y_prob = tf.nn.softmax(x) outputs['y_prob'] = y_prob y_ = tf.argmax(x, axis=-1) \ if num_classes > 1 \ else tf.cast(tf.greater_equal(x[..., 0], 0.5), tf.int32) outputs['y_'] = y_ return outputs