def __init__( self, kernel_size: int, nplants: int, nrows: int, ncols: int, reset_gate: bool = True, update_gate: bool = True, renormalize_inputs: bool = True, min_update: float = 0.0, min_reset: float = 0.0, rnn_act: Optional[str] = None, additive: bool = False, ) -> None: super().__init__() # self.gru = ConvGRUCell( # input_size=nplants, # hidden_size=nplants, # kernel_size=kernel_size, # groups=nplants, # ) self.gru = ConvGRUCell( input_size=1, hidden_size=1, kernel_size=kernel_size, groups=1, reset_gate=reset_gate, update_gate=update_gate, out_bias=(not renormalize_inputs), out_act=rnn_act, min_update=min_update, min_reset=min_reset, ) if not additive: # self.conv_final = nn.Conv2d( # nrows * ncols * nplants, # nrows * ncols, # kernel_size=1, # groups=nrows * ncols, # bias=False, # ) self.conv_final = True self.log_add_weights = nn.Parameter( 0.02 * torch.randn(nplants, nrows, ncols) ) else: self.conv_final = None self.bias = nn.Parameter(torch.tensor(1.0)) self.nplants = nplants self.nrows = nrows self.ncols = ncols self.rnn_act = rnn_act if renormalize_inputs: self.renormalize_inputs = True self.gam = nn.Parameter(torch.ones(nplants)) # self.beta = nn.Parameter(torch.zeros(nplants)) else: self.renormalize_inputs = None
def __init__(self, hidden_size, input_size, kernel_size): super(DecoderRNN, self).__init__() self.hidden_size = hidden_size self.kernel_size = kernel_size self.input_size = input_size self.bn1 = nn.BatchNorm2d(8) self.print_log = False # self.relu1 = nn.Sigmoid() self.gruc_0 = ConvGRUCell(input_size, hidden_size[0], kernel_size[0]) self.conv_pre_0 = deconv2_act(hidden_size[0], out_channels=hidden_size[0], kernel_size=4, stride=2, padding=1) self.gruc_1 = ConvGRUCell(hidden_size[0], hidden_size[1], kernel_size[1]) self.conv_pre_1 = deconv2_act(hidden_size[1], out_channels=hidden_size[1], kernel_size=4, stride=2, padding=1) self.gruc_2 = ConvGRUCell(hidden_size[1], hidden_size[2], kernel_size[2]) self.conv_pre_2_0 = conv2_act(hidden_size[2], out_channels=16, kernel_size=3, stride=1, padding=1) self.conv_pre_2_1 = conv2_act(16, out_channels=1, kernel_size=3, stride=1)
def inference_winner_take_all(images, cams, depth_num, depth_start, depth_end, is_master_gpu=True, reg_type='GRU', inverse_depth=False): """ infer disparity image from stereo images and cameras """ if not inverse_depth: depth_interval = (depth_end - depth_start) / (tf.cast(depth_num, tf.float32) - 1) # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=True) view_towers = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UNetDS2GN({'data': view_image}, is_training=True, reuse=True) view_towers.append(view_tower) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) if inverse_depth: homographies = get_homographies_inv_depth(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_end=depth_end) else: homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) # gru unit gru1_filters = 16 gru2_filters = 4 gru3_filters = 2 feature_shape = [FLAGS.batch_size, FLAGS.max_h/4, FLAGS.max_w/4, 32] gru_input_shape = [feature_shape[1], feature_shape[2]] state1 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters]) state2 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters]) state3 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters]) conv_gru1 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru1_filters) conv_gru2 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru2_filters) conv_gru3 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru3_filters) # initialize variables exp_sum = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='exp_sum', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) depth_image = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='depth_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) max_prob_image = tf.Variable(tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]), name='max_prob_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]) init_map = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]) # define winner take all loop def body(depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre): """Loop body.""" # calculate cost ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homographies = view_homographies[view] homographies = tf.transpose(homographies, perm=[1, 0, 2, 3]) homography = homographies[depth_index] # warped_view_feature = homography_warping(view_towers[view].get_output(), homography) warped_view_feature = tf_transform_homography(view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) cost.set_shape([FLAGS.batch_size, feature_shape[1], feature_shape[2], 32]) # gru reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1') reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2') reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3') reg_cost = tf.layers.conv2d( reg_cost3, 1, 3, padding='same', reuse=tf.AUTO_REUSE, name='prob_conv') prob = tf.exp(reg_cost) # index d_idx = tf.cast(depth_index, tf.float32) if inverse_depth: inv_depth_start = tf.div(1.0, depth_start) inv_depth_end = tf.div(1.0, depth_end) inv_interval = (inv_depth_start - inv_depth_end) / (tf.cast(depth_num, 'float32') - 1) inv_depth = inv_depth_start - d_idx * inv_interval depth = tf.div(1.0, inv_depth) else: depth = depth_start + d_idx * depth_interval temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1]) temp_depth_image = tf.tile( temp_depth_image, [1, feature_shape[1], feature_shape[2], 1]) # update the best update_flag_image = tf.cast(tf.less(max_prob_image, prob), dtype='float32') new_max_prob_image = update_flag_image * prob + (1 - update_flag_image) * max_prob_image new_depth_image = update_flag_image * temp_depth_image + (1 - update_flag_image) * depth_image max_prob_image = tf.assign(max_prob_image, new_max_prob_image) depth_image = tf.assign(depth_image, new_depth_image) # update counter exp_sum = tf.assign_add(exp_sum, prob) depth_index = tf.add(depth_index, incre) return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre # run forward loop exp_sum = tf.assign(exp_sum, init_map) depth_image = tf.assign(depth_image, init_map) max_prob_image = tf.assign(max_prob_image, init_map) depth_index = tf.constant(0) incre = tf.constant(1) cond = lambda depth_index, *_: tf.less(depth_index, depth_num) _, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre = tf.while_loop( cond, body , [depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre] , back_prop=False, parallel_iterations=1) # get output forward_exp_sum = exp_sum + 1e-7 forward_depth_map = depth_image return forward_depth_map, max_prob_image / forward_exp_sum
def inference_prob_recurrent(images, cams, depth_num, depth_start, depth_interval, is_master_gpu=True): """ infer disparity image from stereo images and cameras """ # dynamic gpu params depth_end = depth_start + (tf.cast(depth_num, tf.float32) - 1) * depth_interval # reference image ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) # image feature extraction if is_master_gpu: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False) else: ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=True) view_towers = [] for view in range(1, FLAGS.view_num): view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1) view_tower = UNetDS2GN({'data': view_image}, is_training=True, reuse=True) view_towers.append(view_tower) # get all homographies view_homographies = [] for view in range(1, FLAGS.view_num): view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1) homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num, depth_start=depth_start, depth_interval=depth_interval) view_homographies.append(homographies) gru1_filters = 16 gru2_filters = 4 gru3_filters = 2 feature_shape = [FLAGS.batch_size, FLAGS.max_h / 4, FLAGS.max_w / 4, 32] gru_input_shape = [feature_shape[1], feature_shape[2]] state1 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters]) state2 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters]) state3 = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters]) conv_gru1 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru1_filters) conv_gru2 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru2_filters) conv_gru3 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru3_filters) exp_div = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]) soft_depth_map = tf.zeros( [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]) with tf.name_scope('cost_volume_homography'): # forward cost volume depth_costs = [] for d in range(depth_num): # compute cost (variation metric) ave_feature = ref_tower.get_output() ave_feature2 = tf.square(ref_tower.get_output()) for view in range(0, FLAGS.view_num - 1): homography = tf.slice(view_homographies[view], begin=[0, d, 0, 0], size=[-1, 1, 3, 3]) homography = tf.squeeze(homography, axis=1) # warped_view_feature = homography_warping(view_towers[view].get_output(), homography) warped_view_feature = tf_transform_homography( view_towers[view].get_output(), homography) ave_feature = ave_feature + warped_view_feature ave_feature2 = ave_feature2 + tf.square(warped_view_feature) ave_feature = ave_feature / FLAGS.view_num ave_feature2 = ave_feature2 / FLAGS.view_num cost = ave_feature2 - tf.square(ave_feature) # gru reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1') reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2') reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3') reg_cost = tf.layers.conv2d(reg_cost3, 1, 3, padding='same', reuse=tf.AUTO_REUSE, name='prob_conv') depth_costs.append(reg_cost) prob_volume = tf.stack(depth_costs, axis=1) prob_volume = tf.nn.softmax(prob_volume, axis=1, name='prob_volume') return prob_volume