def _load_sample(self, files): left_file_name = files[0] right_file_name = files[1] gt_file_name = files[2] #read rgb images left_image = read_image_from_disc(left_file_name) right_image = read_image_from_disc(right_file_name) #read gt if self._usePfm: gt_image = tf.py_func(lambda x: readPFM(x)[0], [gt_file_name], tf.float32) gt_image.set_shape([None, None, 1]) else: read_type = tf.uint16 if self._double_prec_gt else tf.uint8 gt_image = read_image_from_disc(gt_file_name, shape=[None, None, 1], dtype=read_type) gt_image = tf.cast(gt_image, tf.float32) if self._double_prec_gt: gt_image = gt_image / 256.0 #crop gt to fit with image (SGM adds some paddings who know why...) gt_image = gt_image[:, :tf.shape(left_image)[1], :] if self._resize_shape[0] is not None: scale_factor = tf.cast(tf.shape(gt_image_left)[1], tf.float32) / float(self._resize_shape[1]) left_image = preprocessing.rescale_image(left_image, self._resize_shape) right_image = preprocessing.rescale_image(right_image, self._resize_shape) gt_image = tf.image.resize_nearest_neighbor( tf.expand_dims(gt_image, axis=0), self._resize_shape)[0] / scale_factor if self._crop_shape[0] is not None: if self._is_training: left_image, right_image, gt_image = preprocessing.random_crop( self._crop_shape, [left_image, right_image, gt_image]) else: (left_image, right_image, gt_image) = [ tf.image.resize_image_with_crop_or_pad( x, self._crop_shape[0], self._crop_shape[1]) for x in [left_image, right_image, gt_image] ] if self._augment: left_image, right_image = preprocessing.augment( left_image, right_image) return [left_image, right_image, gt_image]
def _load_task(self, files): """ Load all the image and return them as three lists, [left_files], [right_files], [gt_files] """ #from 3xk to kx3 left_files = files[0] right_files = files[1] gt_files = files[2] #read images left_task_samples = tf.map_fn( read_image_from_disc, left_files, dtype=tf.float32, parallel_iterations=self._sequence_length) left_task_samples.set_shape([self._sequence_length, None, None, 3]) right_task_samples = tf.map_fn( read_image_from_disc, right_files, dtype=tf.float32, parallel_iterations=self._sequence_length) right_task_samples.set_shape([self._sequence_length, None, None, 3]) gt_task_samples = tf.map_fn(self._decode_gt, gt_files, dtype=tf.float32, parallel_iterations=self._sequence_length) gt_task_samples.set_shape([self._sequence_length, None, None, 1]) #alligned image resize if self._resize_shape[0] is not None: scale_factor = tf.cast( tf.shape(left_task_samples)[1] // self._resize_shape[1], tf.float32) left_task_samples = preprocessing.rescale_image( left_task_samples, self._resize_shape) right_task_samples = preprocessing.rescale_image( right_task_samples, self._resize_shape) gt_task_samples = tf.image.resize_nearest_neighbor( gt_task_samples, self._resize_shape) / scale_factor #alligned random crop if self._crop_shape[0] is not None: left_task_samples, right_task_samples, gt_task_samples = preprocessing.random_crop( self._crop_shape, [left_task_samples, right_task_samples, gt_task_samples]) #augmentation if self._augment: left_task_samples, right_task_samples = preprocessing.augment( left_task_samples, right_task_samples) return [left_task_samples, right_task_samples, gt_task_samples]
def weighting_network(input_data, reuse=False, kernel_size=3, kernel_channels=64, training=False, activation=lambda x: tf.nn.sigmoid(x), scale_factor=4): num_channel = input_data.shape[-1].value full_res_shape = input_data.get_shape().as_list() input_data = tf.stop_gradient(input_data) with tf.variable_scope('feed_forward', reuse=reuse): input_data = preprocessing.rescale_image( input_data, [x // scale_factor for x in full_res_shape[1:3]]) x = conv2d( input_data, [kernel_size, kernel_size, num_channel, kernel_channels / 2], training=training, padding="SAME", name="conv1", bn=True) x = conv2d( x, [kernel_size, kernel_size, kernel_channels / 2, kernel_channels], training=training, padding="SAME", name="conv2", bn=True) weight = conv2d(x, [kernel_size, kernel_size, kernel_channels, 1], activation=activation, training=True, padding="SAME", name="conv3") weight_scaled = preprocessing.rescale_image(weight, full_res_shape[1:3]) #normalize weights weight_scaled = normalize(weight_scaled, blind=True) return weight_scaled, tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + '/feed_forward')
def _build_input_ops(self): #input placeholder ops self._left_placeholder = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='left_input') self._right_placeholder = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='right_input') self._left_input = self._left_placeholder self._right_input = self._right_placeholder if self._image_shape[0] is not None: self._left_input = preprocessing.rescale_image( self._left_input, self._image_shape) self._right_input = preprocessing.rescale_image( self._right_input, self._image_shape) if self._crop_shape[0] is not None: self._left_input = tf.image.resize_image_with_crop_or_pad( self._left_input, self._crop_shape[0], self._crop_shape[1]) self._right_input = tf.image.resize_image_with_crop_or_pad( self._right_input, self._crop_shape[0], self._crop_shape[1])
def scale_tensor(tensor, scale): return preprocessing.rescale_image( tensor, [tf.shape(tensor)[1] // scale, tf.shape(tensor)[2] // scale])
def _build_network(self, args): if args['correlation']: self._add_to_layers( 'conv1a', sharedLayers.conv2d( self._left_input_batch, [7, 7, 3, 64], strides=2, name='conv1', variable_collection=self._variable_collection)) self._add_to_layers( 'conv1b', sharedLayers.conv2d( self._right_input_batch, [7, 7, 3, 64], strides=2, name='conv1', reuse=True, variable_collection=self._variable_collection)) self._add_to_layers( 'conv2a', sharedLayers.conv2d( self._get_layer_as_input('conv1a'), [5, 5, 64, 128], strides=2, name='conv2', variable_collection=self._variable_collection)) self._add_to_layers( 'conv2b', sharedLayers.conv2d( self._get_layer_as_input('conv1b'), [5, 5, 64, 128], strides=2, name='conv2', reuse=True, variable_collection=self._variable_collection)) self._add_to_layers( 'conv_redir', sharedLayers.conv2d( self._get_layer_as_input('conv2a'), [1, 1, 128, 64], strides=1, name='conv_redir', variable_collection=self._variable_collection)) self._add_to_layers( 'corr', sharedLayers.correlation(self._get_layer_as_input('conv2a'), self._get_layer_as_input('conv2b'), max_disp=MAX_DISP)) self._add_to_layers( 'conv3', sharedLayers.conv2d( tf.concat([ self._get_layer_as_input('corr'), self._get_layer_as_input('conv_redir') ], axis=3), [5, 5, MAX_DISP * 2 + 1 + 64, 256], strides=2, name='conv3', variable_collection=self._variable_collection)) else: concat_inputs = tf.concat( [self._left_img_batch, self._right_input_batch], axis=-1) self._add_to_layers( 'conv1', sharedLayers.conv2d(concat_inputs, [7, 7, 6, 64], strides=2, name='conv1')) self._add_to_layers( 'conv2', sharedLayers.conv2d( self._get_layer_as_input('conv1'), [5, 5, 64, 128], strides=2, name='conv2', variable_collection=self._variable_collection)) self._add_to_layers( 'conv3', sharedLayers.conv2d( self._get_layer_as_input('conv2'), [5, 5, 128, 256], strides=2, name='conv3', variable_collection=self._variable_collection)) self._add_to_layers( 'conv3/1', sharedLayers.conv2d(self._get_layer_as_input('conv3'), [3, 3, 256, 256], strides=1, name='conv3/1', variable_collection=self._variable_collection)) self._add_to_layers( 'conv4', sharedLayers.conv2d(self._get_layer_as_input('conv3/1'), [3, 3, 256, 512], strides=2, name='conv4', variable_collection=self._variable_collection)) self._add_to_layers( 'conv4/1', sharedLayers.conv2d(self._get_layer_as_input('conv4'), [3, 3, 512, 512], strides=1, name='conv4/1', variable_collection=self._variable_collection)) self._add_to_layers( 'conv5', sharedLayers.conv2d(self._get_layer_as_input('conv4/1'), [3, 3, 512, 512], strides=2, name='conv5', variable_collection=self._variable_collection)) self._add_to_layers( 'conv5/1', sharedLayers.conv2d(self._get_layer_as_input('conv5'), [3, 3, 512, 512], strides=1, name='conv5/1', variable_collection=self._variable_collection)) self._add_to_layers( 'conv6', sharedLayers.conv2d(self._get_layer_as_input('conv5/1'), [3, 3, 512, 1024], strides=2, name='conv6', variable_collection=self._variable_collection)) self._add_to_layers( 'conv6/1', sharedLayers.conv2d(self._get_layer_as_input('conv6'), [3, 3, 1024, 1024], strides=1, name='conv6/1', variable_collection=self._variable_collection)) self._upsampling_block(self._get_layer_as_input('conv6/1'), self._get_layer_as_input('conv5/1'), 1024, 512, 512, name='up5') self._upsampling_block(self._get_layer_as_input('up5/concat'), self._get_layer_as_input('conv4/1'), 512, 256, 512, name='up4') self._upsampling_block(self._get_layer_as_input('up4/concat'), self._get_layer_as_input('conv3/1'), 256, 128, 256, name='up3') if args['correlation']: self._upsampling_block(self._get_layer_as_input('up3/concat'), self._get_layer_as_input('conv2a'), 128, 64, 128, name='up2') else: self._upsampling_block(self._get_layer_as_input('up3/concat'), self._get_layer_as_input('conv2'), 128, 64, 128, name='up2') if args['correlation']: self._upsampling_block(self._get_layer_as_input('up2/concat'), self._get_layer_as_input('conv1a'), 64, 32, 64, name='up1') else: self._upsampling_block(self._get_layer_as_input('up2/concat'), self._get_layer_as_input('conv1'), 64, 32, 64, name='up1') self._add_to_layers( 'prediction', sharedLayers.conv2d(self._get_layer_as_input('up1/concat'), [3, 3, 32, 1], strides=1, activation=lambda x: x, name='prediction', variable_collection=self._variable_collection)) self._disparities.append(self._layers['prediction']) rescaled_prediction = preprocessing.rescale_image( self._layers['prediction'], tf.shape(self._left_input_batch)[1:3]) * 2 self._layers[ 'rescaled_prediction'] = tf.image.resize_image_with_crop_or_pad( rescaled_prediction, self._restore_shape[0], self._restore_shape[1]) self._disparities.append(self._layers['rescaled_prediction'])