def depthmotion_block_demon_original(image_pair, image2_2, prev_flow2, prev_flowconf2, prev_rotation=None, prev_translation=None, intrinsics=None, data_format='channels_first'): """Creates a depth and motion network image_pair: Tensor Image pair concatenated along the channel axis. The tensor format is NCHW with C == 6. image2_2: Tensor Second image at resolution level 2 prev_flow2: Tensor The output of the flow network. Contains only the flow (2 channels) prev_flowconf2: Tensor The output of the flow network. Contains flow and flow confidence (4 channels) prev_rotation: Tensor The previously predicted rotation. prev_translaion: Tensor The previously predicted translation. intrinsics: Tensor Tensor with the intrinsic camera parameters Only required if prev_rotation and prev_translation is not None. Returns a dictionary with the predictions for depth, normals and motion """ conv_params = {'data_format': data_format} fc_params = {} # contracting part conv1 = convrelu2_caffe_padding(name='conv1', inputs=image_pair, num_outputs=32, kernel_size=9, stride=2, **conv_params) conv2 = convrelu2_caffe_padding(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create extra inputs if data_format == 'channels_first': image2_2_warped = sops.warp2d(image2_2, prev_flow2, normalized=True, border_mode='value') else: prev_flow2_nchw = convert_NHWC_to_NCHW(prev_flow2) image2_2_warped = convert_NCHW_to_NHWC( sops.warp2d(convert_NHWC_to_NCHW(image2_2), prev_flow2_nchw, normalized=True, border_mode='value')) extra_inputs = [image2_2_warped, prev_flowconf2] if (not prev_rotation is None) and (not prev_translation is None) and ( not intrinsics is None): if data_format == 'channels_first': depth_from_flow = sops.flow_to_depth( flow=prev_flow2, intrinsics=intrinsics, rotation=prev_rotation, translation=prev_translation, normalized_flow=True, inverse_depth=True, ) else: depth_from_flow = convert_NCHW_to_NHWC( sops.flow_to_depth( flow=prev_flow2_nchw, intrinsics=intrinsics, rotation=prev_rotation, translation=prev_translation, normalized_flow=True, inverse_depth=True, )) extra_inputs.append(depth_from_flow) concat_extra_inputs = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2_caffe_padding(name='conv2_extra_inputs', inputs=concat_extra_inputs, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat((conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2_caffe_padding(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv2_1) conv3 = convrelu2_caffe_padding(name='conv3', inputs=conv2_1, num_outputs=128, kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2_caffe_padding(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv3_1) conv4 = convrelu2_caffe_padding(name='conv4', inputs=conv3_1, num_outputs=256, kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2_caffe_padding(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv4_1) conv5 = convrelu2_caffe_padding(name='conv5', inputs=conv4_1, num_outputs=512, kernel_size=3, stride=2, **conv_params) conv5_1 = convrelu2_caffe_padding(name='conv5_1', inputs=conv5, num_outputs=512, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv5_1) # motion prediction part motion_conv1 = convrelu_caffe_padding( name='motion_conv1', inputs=conv5_1, num_outputs=128, kernel_size=3, strides=1, **conv_params, ) tf.add_to_collection('endpoints', motion_conv1) if data_format == 'channels_last': motion_conv1 = convert_NHWC_to_NCHW(motion_conv1) motion_fc1 = tf.layers.dense( name='motion_fc1', inputs=tf.contrib.layers.flatten(motion_conv1), units=1024, activation=myLeakyRelu, **fc_params, ) motion_fc2 = tf.layers.dense( name='motion_fc2', inputs=motion_fc1, units=128, activation=myLeakyRelu, **fc_params, ) predict_motion_scale = tf.layers.dense( name='motion_fc3', inputs=motion_fc2, units=7, activation=None, **fc_params, ) predict_rotation, predict_translation, predict_scale = tf.split( value=predict_motion_scale, num_or_size_splits=[3, 3, 1], axis=1) # expanding part with tf.variable_scope('refine4'): concat4 = _refine_caffe_padding( inp=conv5_1, num_outputs=256, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine_caffe_padding( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine_caffe_padding( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_depthnormal2'): predict_depth2, predict_normal2 = _predict_depthnormal_caffe_padding( concat2, predicted_scale=predict_scale, **conv_params) return { 'predict_depth2': predict_depth2, 'predict_normal2': predict_normal2, 'predict_rotation': predict_rotation, 'predict_translation': predict_translation, 'predict_scale': predict_scale, }
def depthmotion_block(image_pair, image2_2, prev_flow2, prev_flowconf2, prev_rotation=None, prev_translation=None, intrinsics=None, data_format='channels_first', kernel_regularizer=None): """Creates a depth and motion network image_pair: Tensor Image pair concatenated along the channel axis. The tensor format is NCHW with C == 6. image2_2: Tensor Second image at resolution level 2 prev_flow2: Tensor The output of the flow network. Contains only the flow (2 channels) prev_flowconf2: Tensor The output of the flow network. Contains flow and flow confidence (4 channels) prev_rotation: Tensor The previously predicted rotation. prev_translaion: Tensor The previously predicted translation. intrinsics: Tensor Tensor with the intrinsic camera parameters Only required if prev_rotation and prev_translation is not None. Returns a dictionary with the predictions for depth, normals and motion """ conv_params = { 'data_format': data_format, 'kernel_regularizer': kernel_regularizer } fc_params = {} # contracting part conv1 = convrelu2(name='conv1', inputs=image_pair, num_outputs=(24, 32), kernel_size=9, stride=2, **conv_params) conv2 = convrelu2(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create extra inputs if data_format == 'channels_first': image2_2_warped = sops.warp2d(image2_2, prev_flow2, normalized=True, border_mode='value') else: prev_flow2_nchw = convert_NHWC_to_NCHW(prev_flow2) image2_2_warped = convert_NCHW_to_NHWC( sops.warp2d(convert_NHWC_to_NCHW(image2_2), prev_flow2_nchw, normalized=True, border_mode='value')) extra_inputs = [image2_2_warped, prev_flowconf2] if (not prev_rotation is None) and (not prev_translation is None) and ( not intrinsics is None): if data_format == 'channels_first': depth_from_flow = sops.flow_to_depth2( flow=prev_flow2, intrinsics=intrinsics, rotation=prev_rotation, translation=prev_translation, normalized_flow=True, inverse_depth=True, ) else: depth_from_flow = convert_NCHW_to_NHWC( sops.flow_to_depth2( flow=prev_flow2_nchw, intrinsics=intrinsics, rotation=prev_rotation, translation=prev_translation, normalized_flow=True, inverse_depth=True, )) depth_from_flow = tf.clip_by_value(depth_from_flow, 0.0, 50.0) extra_inputs.append(depth_from_flow) concat_extra_inputs = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2(name='conv2_extra_inputs', inputs=concat_extra_inputs, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat((conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) conv3 = convrelu2(name='conv3', inputs=conv2_1, num_outputs=(96, 128), kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) conv4 = convrelu2(name='conv4', inputs=conv3_1, num_outputs=(192, 256), kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) conv5 = convrelu2(name='conv5', inputs=conv4_1, num_outputs=384, kernel_size=3, stride=2, **conv_params) conv5_1 = convrelu2(name='conv5_1', inputs=conv5, num_outputs=384, kernel_size=3, stride=1, **conv_params) dense_slice_shape = conv5_1.get_shape().as_list() if data_format == 'channels_first': dense_slice_shape[1] = 96 else: dense_slice_shape[-1] = 96 units = 1 for i in range(1, len(dense_slice_shape)): units *= dense_slice_shape[i] dense5 = tf.layers.dense(tf.contrib.layers.flatten( tf.slice(conv5_1, [0, 0, 0, 0], dense_slice_shape)), units=units, activation=myLeakyRelu, kernel_initializer=default_weights_initializer(), kernel_regularizer=kernel_regularizer, name='dense5') print(dense5) conv5_1_dense5 = tf.concat( (conv5_1, tf.reshape(dense5, dense_slice_shape)), axis=1 if data_format == 'channels_first' else 3) # motion prediction part motion_conv3 = convrelu2(name='motion_conv3', inputs=conv2_1, num_outputs=64, kernel_size=5, stride=2, **conv_params) motion_conv4 = convrelu2(name='motion_conv4', inputs=motion_conv3, num_outputs=64, kernel_size=5, stride=2, **conv_params) motion_conv5a = convrelu2(name='motion_conv5a', inputs=motion_conv4, num_outputs=64, kernel_size=3, stride=2, **conv_params) motion_conv5b = convrelu( name='motion_conv5b', inputs=conv5_1_dense5, num_outputs=64, kernel_size=3, strides=1, **conv_params, ) motion_conv5_1 = tf.concat( (motion_conv5a, motion_conv5b), axis=1 if data_format == 'channels_first' else 3) if data_format == 'channels_last': motion_conv5_1 = convert_NHWC_to_NCHW(motion_conv5_1) motion_fc1 = tf.layers.dense( name='motion_fc1', inputs=tf.contrib.layers.flatten(motion_conv5_1), units=1024, activation=myLeakyRelu, kernel_regularizer=kernel_regularizer, **fc_params, ) motion_fc2 = tf.layers.dense( name='motion_fc2', inputs=motion_fc1, units=128, activation=myLeakyRelu, kernel_regularizer=kernel_regularizer, **fc_params, ) predict_motion_scale = tf.layers.dense( name='motion_fc3', inputs=motion_fc2, units=7, activation=None, kernel_regularizer=kernel_regularizer, **fc_params, ) predict_rotation, predict_translation, predict_scale = tf.split( value=predict_motion_scale, num_or_size_splits=[3, 3, 1], axis=1) # expanding part with tf.variable_scope('refine4'): concat4 = _refine( inp=conv5_1, num_outputs=256, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_depthnormal2'): predict_depth2, predict_normal2 = _predict_depthnormal( concat2, predicted_scale=predict_scale, **conv_params) return { 'predict_depth2': predict_depth2, 'predict_normal2': predict_normal2, 'predict_rotation': predict_rotation, 'predict_translation': predict_translation, 'predict_scale': predict_scale, }
def flow_block_demon_original(image_pair, image2_2=None, intrinsics=None, prev_predictions=None, data_format='channels_first'): """Creates a flow network image_pair: Tensor Image pair concatenated along the channel axis. image2_2: Tensor Second image at resolution level 2 (downsampled two times) intrinsics: Tensor The normalized intrinsic parameters prev_predictions: dict of Tensor Predictions from the previous depth block Returns a dict with the predictions """ conv_params = {'data_format': data_format} # contracting part conv1 = convrelu2_caffe_padding(name='conv1', inputs=image_pair, num_outputs=32, kernel_size=9, stride=2, **conv_params) if prev_predictions is None: conv2 = convrelu2_caffe_padding(name='conv2', inputs=conv1, num_outputs=64, kernel_size=7, stride=2, **conv_params) conv2_1 = convrelu2_caffe_padding(name='conv2_1', inputs=conv2, num_outputs=64, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv2_1) else: conv2 = convrelu2_caffe_padding(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create warped input if data_format == 'channels_first': prev_depth_nchw = prev_predictions['predict_depth2'] else: prev_depth_nchw = convert_NHWC_to_NCHW( prev_predictions['predict_depth2']) _flow_from_depth_motion = sops.depth_to_flow( intrinsics=intrinsics, depth=prev_depth_nchw, rotation=prev_predictions['predict_rotation'], translation=prev_predictions['predict_translation'], inverse_depth=True, normalize_flow=True, ) # set flow vectors to zero if the motion is too large. # this also eliminates nan values which can be produced by very bad camera parameters flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion, axis=1, keep_dims=True) flow_from_depth_motion_norm = tf.concat( (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1) tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32) flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0, _flow_from_depth_motion, tmp_zeros) image2_2_warped = sops.warp2d( input=image2_2 if data_format == 'channels_first' else convert_NHWC_to_NCHW(image2_2), displacements=flow_from_depth_motion, normalized=True, border_mode='value', ) if data_format == 'channels_last': flow_from_depth_motion = convert_NCHW_to_NHWC( flow_from_depth_motion) image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped) extra_inputs = (image2_2_warped, flow_from_depth_motion, prev_predictions['predict_depth2'], prev_predictions['predict_normal2']) # stop gradient here extra_inputs_concat = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2_caffe_padding(name='conv2_extra_inputs', inputs=extra_inputs_concat, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat( (conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2_caffe_padding(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) conv3 = convrelu2_caffe_padding(name='conv3', inputs=conv2_1, num_outputs=128, kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2_caffe_padding(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv3_1) conv4 = convrelu2_caffe_padding(name='conv4', inputs=conv3_1, num_outputs=256, kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2_caffe_padding(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv4_1) conv5 = convrelu2_caffe_padding(name='conv5', inputs=conv4_1, num_outputs=512, kernel_size=5, stride=2, **conv_params) conv5_1 = convrelu2_caffe_padding(name='conv5_1', inputs=conv5, num_outputs=512, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv5_1) # expanding part with tf.variable_scope('predict_flow5'): predict_flowconf5 = _predict_flow_caffe_padding( conv5_1, predict_confidence=True, **conv_params) tf.add_to_collection('endpoints', predict_flowconf5) with tf.variable_scope('upsample_flow5to4'): predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2, **conv_params) with tf.variable_scope('refine4'): concat4 = _refine_caffe_padding( inp=conv5_1, num_outputs=256, upsampled_prediction=predict_flowconf5to4, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine_caffe_padding( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine_caffe_padding( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_flow2'): predict_flowconf2 = _predict_flow_caffe_padding( concat2, predict_confidence=True, **conv_params) tf.add_to_collection('endpoints', predict_flowconf2) return { 'predict_flowconf5': predict_flowconf5, 'predict_flowconf2': predict_flowconf2 }
def flow_block(image_pair, image2_2=None, intrinsics=None, prev_predictions=None, data_format='channels_first', kernel_regularizer=None): """Creates a flow network image_pair: Tensor Image pair concatenated along the channel axis. image2_2: Tensor Second image at resolution level 2 (downsampled two times) intrinsics: Tensor The normalized intrinsic parameters prev_predictions: dict of Tensor Predictions from the previous depth block Returns a dict with the predictions """ conv_params = { 'data_format': data_format, 'kernel_regularizer': kernel_regularizer } # contracting part conv1 = convrelu2(name='conv1', inputs=image_pair, num_outputs=(24, 32), kernel_size=9, stride=2, **conv_params) if prev_predictions is None: conv2 = convrelu2(name='conv2', inputs=conv1, num_outputs=(48, 64), kernel_size=7, stride=2, **conv_params) conv2_1 = convrelu2(name='conv2_1', inputs=conv2, num_outputs=64, kernel_size=3, stride=1, **conv_params) else: conv2 = convrelu2(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create warped input if data_format == 'channels_first': prev_depth_nchw = prev_predictions['predict_depth2'] else: prev_depth_nchw = convert_NHWC_to_NCHW( prev_predictions['predict_depth2']) _flow_from_depth_motion = sops.depth_to_flow( intrinsics=intrinsics, depth=prev_depth_nchw, rotation=prev_predictions['predict_rotation'], translation=prev_predictions['predict_translation'], inverse_depth=True, normalize_flow=True, ) # set flow vectors to zero if the motion is too large. # this also eliminates nan values which can be produced by very bad camera parameters flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion, axis=1, keep_dims=True) flow_from_depth_motion_norm = tf.concat( (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1) tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32) flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0, _flow_from_depth_motion, tmp_zeros) image2_2_warped = sops.warp2d( input=image2_2 if data_format == 'channels_first' else convert_NHWC_to_NCHW(image2_2), displacements=flow_from_depth_motion, normalized=True, border_mode='value', ) if data_format == 'channels_last': flow_from_depth_motion = convert_NCHW_to_NHWC( flow_from_depth_motion) image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped) extra_inputs = (image2_2_warped, flow_from_depth_motion, prev_predictions['predict_depth2'], prev_predictions['predict_normal2']) # stop gradient here extra_inputs_concat = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2(name='conv2_extra_inputs', inputs=extra_inputs_concat, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat( (conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) conv3 = convrelu2(name='conv3', inputs=conv2_1, num_outputs=(96, 128), kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) conv4 = convrelu2(name='conv4', inputs=conv3_1, num_outputs=(192, 256), kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) conv5 = convrelu2(name='conv5', inputs=conv4_1, num_outputs=384, kernel_size=5, stride=2, **conv_params) conv5_1 = convrelu2(name='conv5_1', inputs=conv5, num_outputs=384, kernel_size=3, stride=1, **conv_params) dense_slice_shape = conv5_1.get_shape().as_list() if data_format == 'channels_first': dense_slice_shape[1] = 96 else: dense_slice_shape[-1] = 96 units = 1 for i in range(1, len(dense_slice_shape)): units *= dense_slice_shape[i] dense5 = tf.layers.dense(tf.contrib.layers.flatten( tf.slice(conv5_1, [0, 0, 0, 0], dense_slice_shape)), units=units, activation=myLeakyRelu, kernel_initializer=default_weights_initializer(), kernel_regularizer=kernel_regularizer, name='dense5') print(dense5) conv5_1_dense5 = tf.concat( (conv5_1, tf.reshape(dense5, dense_slice_shape)), axis=1 if data_format == 'channels_first' else 3) # expanding part with tf.variable_scope('predict_flow5'): predict_flowconf5 = _predict_flow(conv5_1_dense5, predict_confidence=True, **conv_params) with tf.variable_scope('upsample_flow5to4'): predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2, **conv_params) with tf.variable_scope('refine4'): concat4 = _refine( inp=conv5_1_dense5, num_outputs=256, upsampled_prediction=predict_flowconf5to4, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_flow2'): predict_flowconf2 = _predict_flow(concat2, predict_confidence=True, **conv_params) return { 'predict_flowconf5': predict_flowconf5, 'predict_flowconf2': predict_flowconf2 }