def seg_stack(net, mdl_im_feats, roi_im_feats, scope='ImSeg_Net'): mdl_im_bs = net.mdl_im_tensor_shape[1] with tf.variable_scope(scope): mdl_im_feats = uncollapse_dims(mdl_im_feats, net.batch_size, mdl_im_bs) cat_mdl_feats = mdl_im_feats[:, 0, :, :, :] for v in range(1, mdl_im_bs): cat_mdl_feats = tf.concat( [cat_mdl_feats, mdl_im_feats[:, v, :, :, :]], -1) stack = tf.concat([cat_mdl_feats, roi_im_feats], -1) net.seg_net[scope + '_stack'] = stack conv1 = conv2d('conv1', stack, 2, 1024, stride=1, norm=net.norm, mode=net.mode) net.seg_net[scope + '_conv1'] = conv1 conv2 = conv2d('conv2', conv1, 2, 512, stride=2, norm=net.norm, mode=net.mode) net.seg_net[scope + '_conv2'] = conv2 conv3 = conv2d('conv3', conv2, 3, 256, stride=1, norm=net.norm, mode=net.mode) net.seg_net[scope + '_conv3'] = conv3 conv4 = conv2d('conv4', conv3, 3, 256, stride=1, norm=net.norm, mode=net.mode) conv4 = dropout(conv4, net.keep_prob) net.seg_net[scope + '_conv4'] = conv4 deconv1 = deconv2d('deconv1', conv4, 2, 256, stride=2, norm=net.norm, mode=net.mode) net.seg_net[scope + '_deconv1'] = deconv1 out = conv2d('out', deconv1, 1, 1, stride=1, norm=net.norm, mode=net.mode) net.seg_net[scope + '_out'] = out return out
def model_vlsm(net, im_net=im_unet, grid_net=grid_unet32, rnn=convgru, scope_name='MVNet'): ''' Voxel LSTM model ''' with tf.variable_scope(scope_name): # Setup placeholders for image, extrinsics and intrinsics net.ims = tf.placeholder(tf.float32, net.im_tensor_shape, name='ims') net.K = tf.placeholder(tf.float32, net.K_tensor_shape, name='K') net.Rcam = tf.placeholder(tf.float32, net.R_tensor_shape, name='R') # Compute image features net.im_feats = im_net(net, collapse_dims(net.ims)) # Unproject feature grid net.cost_grid = proj_splat(net, net.im_feats, net.K, net.Rcam) # Combine grids with LSTM/GRU net.pool_grid, _ = rnn(net.cost_grid) # 3D grid reasoning net.pool_grid = collapse_dims(net.pool_grid) net.pred_vox = grid_net(net, net.pool_grid) net.pred_vox = uncollapse_dims(net.pred_vox, net.batch_size, net.im_batch) net.prob_vox = tf.nn.sigmoid(net.pred_vox) return net
def _skip_unet(d_f, im_f): ''' im_f: bs x im_bs x ... ; d_f: bs x t x im_bs ...''' with tf.variable_scope('Skip'): d_shape = tf_static_shape(d_f) im_shape = tf_static_shape(im_f) im_f = uncollapse_dims(im_f, net.batch_size, net.im_batch) im_rep = repeat_tensor(im_f, d_shape[0] / im_shape[0], rep_dim=1) im_rep = tf.reshape(im_rep, d_shape[:-1] + [im_shape[-1]]) return tf.concat([im_rep, d_f], axis=-1)
def model_dlsm(net, im_net=im_unet, grid_net=grid_unet32, rnn=convgru, ray_samples=64, proj_x=4, sepup=False, im_skip=True, proj_last=False): '''Depth LSTM model ''' with tf.variable_scope('MVNet'): # Setup placeholders for im, depth, extrinsic and intrinsic matrices net.ims = tf.placeholder(tf.float32, net.im_tensor_shape, name='ims') net.K = tf.placeholder(tf.float32, net.K_tensor_shape, name='K') net.Rcam = tf.placeholder(tf.float32, net.R_tensor_shape, name='R') # Compute image features net.im_feats = im_net(net, collapse_dims(net.ims)) # Unproject feature grid net.cost_grid = proj_splat(net, net.im_feats, net.K, net.Rcam) # Combine grids with LSTM/GRU net.pool_grid, _ = rnn(net.cost_grid) # Grid network net.pool_grid = collapse_dims(net.pool_grid) net.pred_vox = grid_net(net, net.pool_grid) net.proj_vox = uncollapse_dims(net.grid_net['deconv3'], net.batch_size, net.im_batch) # Projection proj_vox_in = (net.proj_vox if not proj_last else net.proj_vox[:, -1:, ...]) net.ray_slices, z_samples = proj_slice(net, proj_vox_in, net.K, net.Rcam, proj_size=net.im_h / proj_x, samples=ray_samples) bs, im_bs, ks, im_sz1, im_sz2, fdim, _ = tf_static_shape( net.ray_slices) net.depth_in = tf.reshape( net.ray_slices, [bs * im_bs * ks, im_sz1, im_sz2, fdim * ray_samples]) # Depth network if proj_x == 4: if not sepup: net.depth_out = depth_net_x4(net, net.depth_in, im_skip) else: net.depth_out = depth_net_x4_sepup(net, net.depth_in, im_skip) elif proj_x == 8: if not sepup: net.depth_out = depth_net_x8(net, net.depth_in, im_skip) else: net.depth_out = depth_net_x8_sepup(net, net.depth_in, im_skip) else: logger = logging.getLogger('mview3d.' + __name__) logger.error( 'Unsupported subsample ratio for projection. Use {4, 8}') net.depth_out = tf.reshape(net.depth_out, [bs, im_bs, ks, net.im_h, net.im_w, 1]) return net