def normal_est_net(grid_fisher, bn_decay, is_training, weight_decay, scope_str): #CNN architecture - adjust architecture to number of gaussians (currently supports only 8 x 8 x 8) batch_size = grid_fisher.get_shape()[0].value layer = 1 net = inception_module(grid_fisher, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1]) # FC architectrure - normal estiamation network net = tf_util.fully_connected(global_feature, 1024, bn=True, is_training=is_training, scope='fc1' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc4' + scope_str, is_training=is_training, weigth_decay=weight_decay) net = tf.squeeze(net) return net
def get_model(sequence, is_training, num_classes=10, bn_decay=0.999, weight_decay=0.0001, sn=4, pool_t=False, pool_first=False, freeze_bn=False): """ sequence Net, input is BxTxHxWx3, output Bx400 """ bsize = sequence.get_shape()[0].value end_points = {} channel_stride = [(64, 1), (128, 2), (256, 2), (512, 2)] # res block options num_blocks = [2, 2, 2, 2] topks = [None, sn, sn, None] shrink_ratios = [None, 2, None, None] net = tf_util.conv3d(sequence, 64, [1, 3, 3], stride=[1, 2 if pool_first else 1, 2 if pool_first else 1], bn=True, bn_decay=bn_decay, is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv0') net = tf_util.max_pool3d(net, [1, 3, 3], stride=[1, 2, 2], scope='pool0', padding='SAME') for gp, cs in enumerate(channel_stride): n_channels = cs[0] stride = cs[1] with tf.variable_scope('group{}'.format(gp)): for i in range(num_blocks[gp]): with tf.variable_scope('block{}'.format(i)): end_points['res{}_{}_in'.format(gp, i)] = net if i == 0: net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \ is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1') else: net_bra = tf_util.conv3d(net, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \ is_training=is_training, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv1') net_bra = tf_util.conv3d(net_bra, n_channels, [1, 3, 3], stride=[1, 1, 1], bn=True, bn_decay=bn_decay, \ is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='conv2') if net.get_shape()[-1].value != n_channels: net = tf_util.conv3d(net, n_channels, [1, 1, 1], stride=[1, stride, stride], bn=True, bn_decay=bn_decay, \ is_training=is_training, activation_fn=None, weight_decay=weight_decay, freeze_bn=freeze_bn, scope='convshortcut') net = net + net_bra end_points['res{}_{}_mid'.format(gp, i)] = net if topks[gp] is not None: c = net.get_shape()[-1].value net_pointnet, end_point = net_utils.senot_module(net, k=topks[gp], mlp=[c//4,c//2], scope='pointnet', is_training=is_training, bn_decay=bn_decay, \ weight_decay=weight_decay, distance='l2', activation_fn=None, freeze_bn=freeze_bn, shrink_ratio=shrink_ratios[gp]) net += net_pointnet end_points['pointnet{}_{}'.format(gp, i)] = end_point end_points['after_pointnet{}_{}'.format(gp, i)] = net net = tf.nn.relu(net) end_points['res{}_{}_out'.format(gp, i)] = net net = tf.reduce_mean(net, [1,2,3]) net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp') net = tf_util.fully_connected(net, num_classes, activation_fn=None, weight_decay=weight_decay, scope='fc') return net, end_points
def noise_est_net(grid_fisher, bn_decay, is_training, weight_decay, scope_str): batch_size = grid_fisher.get_shape()[0].value layer = 1 net = inception_module(grid_fisher, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1]) # normal estiamation network net = tf_util.fully_connected(global_feature, 1024, bn=True, is_training=is_training, scope='fc1' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3' + scope_str, bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.fully_connected(net, 1, activation_fn=tf.nn.relu, scope='fc4' + scope_str, is_training=is_training, weigth_decay=weight_decay) net = tf.squeeze(net) return net
def get_model(point_cloud, is_training, bn_decay=None): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = point_cloud.get_shape()[0].value # vx = point_cloud.get_shape()[1].value # vy = point_cloud.get_shape()[2].value # vz = point_cloud.get_shape()[3].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv3d(input_image, 32, [5, 5, 5], scope='conv1', stride=[2, 2, 2], bn=True, is_training=is_training, padding='SAME', bn_decay=bn_decay) net = tf_util.conv3d(net, 32, [3, 3, 3], scope='conv2', stride=[2, 2, 2], bn=True, is_training=is_training, padding='SAME', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 10, activation_fn=None, scope='fc2') return net, end_points
def get_model(points, w, mu, sigma, is_training, bn_decay=None, weigth_decay=0.005, add_noise=False, num_classes=40): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = points.get_shape()[0].value n_points = points.get_shape()[1].value n_gaussians = w.shape[0].value res = int(np.round(np.power(n_gaussians,1.0/3.0))) fv = tf_util.get_fv_minmax(points, w, mu, sigma, flatten=False) if add_noise: noise = tf.cond(is_training, lambda: tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32), lambda: tf.zeros(shape=tf.shape(fv))) #noise = tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32) fv = fv + noise grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res]) grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1]) #3D Voxenet with pfv layer = 1 net = tf_util.conv3d(grid_fisher, 32, [5, 5, 5], scope='conv'+str(layer), stride=[2, 2, 2], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) layer = layer + 1 net = tf_util.conv3d(net, 32, [3, 3, 3], scope='conv'+str(layer), stride=[1, 1, 1], padding='SAME', bn=True, bn_decay=bn_decay, is_training=is_training) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool'+str(layer), stride=[2, 2, 2], padding='SAME') net = tf.reshape(net,[batch_size, -1]) #Classifier net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weigth_decay=weigth_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, num_classes, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weigth_decay) return net, fv
def get_model(source_point_cloud, template_point_cloud, is_training, bn_decay=None): point_cloud = tf.concat([source_point_cloud, template_point_cloud], 0) batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value end_points = {} input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv3d(input_image, 32, [5, 5, 5], padding='VALID', stride=[2, 2, 2], bn=False, is_training=is_training, scope='conv1', bn_decay=bn_decay) net = tf_util.conv3d(net, 32, [3, 3, 3], padding='VALID', stride=[1, 1, 1], bn=False, is_training=is_training, scope='conv2', bn_decay=bn_decay) # Symmetric function: max pooling net = tf_util.max_pool3d(net, [2, 2, 2], padding='VALID', scope='maxpool') net = tf.reshape(net, [batch_size, -1]) print(net) source_global_feature = tf.slice(net, [0, 0], [int(batch_size / 2), 6912]) template_global_feature = tf.slice(net, [int(batch_size / 2), 0], [int(batch_size / 2), 6912]) return source_global_feature, template_global_feature
def get_model(points, w, mu, sigma, is_training, bn_decay=None, weigth_decay=0.005, add_noise=False, num_classes=40): """ Classification PointNet, input is BxNx3, output Bx40 """ batch_size = points.get_shape()[0].value n_points = points.get_shape()[1].value n_gaussians = w.shape[0].value res = int(np.round(np.power(n_gaussians, 1.0 / 3.0))) end_points = {} fv = tf_util.get_3dmfv(points, w, mu, sigma, flatten=False) if add_noise: noise = tf.cond( is_training, lambda: tf.random_normal( shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32), lambda: tf.zeros(shape=tf.shape(fv))) #noise = tf.random_normal(shape=tf.shape(fv), mean=0.0, stddev=0.01, dtype=tf.float32) fv = fv + noise grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res]) grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1]) # Inception layer = 1 net = inception_module(grid_fisher, n_filters=64, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer), stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer), stride=[2, 2, 2], padding='SAME') net = tf.reshape(net, [batch_size, -1]) #Classifier net = tf_util.fully_connected(net, 1024, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weigth_decay=weigth_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay, weigth_decay=weigth_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3', bn_decay=bn_decay, weigth_decay=weigth_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp3') net = tf_util.fully_connected(net, num_classes, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weigth_decay) end_points['Logits'] = net end_points['Probabilities'] = tf.nn.softmax(net, name='Probabilities') return net, end_points
def get_model(point_cloud, input_label, is_training, cat_num, part_num, \ batch_size, num_point, weight_decay, bn_decay=None): """ ConvNet baseline, input is BxNx3 gray image """ end_points = {} KNN = 12 with tf.variable_scope('transform_net1') as sc: K = 3 transform = get_transform(point_cloud, is_training, bn_decay, K=3) point_cloud_transformed = tf.matmul(point_cloud, transform) # KNN search knn_point = KNN_search(point_cloud_transformed, KNN=KNN, name_scope='KNN_search') # 32 x 1024 x KNN x 3 knn_point = tf.expand_dims(knn_point, axis=-1) # 32 x 1024 x KNN x 3 x 1 point_cloud_transformed = tf.expand_dims(point_cloud_transformed, axis=-1) out0 = tf_util.conv2d(point_cloud_transformed, 64, [1, 3], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='conv', bn_decay=bn_decay) # 32 x 1024 x 1 x 64 out0_tile = tf.tile(out0, multiples=[1, 1, KNN, 1]) # 32 x 1024 x 16 x 64 out0_tile = tf.expand_dims(out0_tile, axis=-2) out1 = tf_util.conv3d(knn_point, 64, [1, 1, 3], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv1', bn_decay=bn_decay) concat = tf.concat(values=[out1, out0_tile], axis=-1) out2 = tf_util.conv3d(concat, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv2', bn_decay=bn_decay) out3 = tf_util.conv3d(out2, 128, [1, 1, 1], padding='VALID', stride=[1, 1, 1], bn=True, is_training=is_training, scope='conv3', bn_decay=bn_decay) pool_k = tf_util.max_pool3d(out3, kernel_size=[1, KNN, 1], stride=[1, 2, 2], padding='VALID', scope='pool_k') # 32 x 1024 x 1 x 1 x 128 pool_k = tf.squeeze(pool_k, axis=2) # VLAD layer vlad_out, index = VLAD(pool_k, 16, is_training, bn_decay, layer_name='VLAD') out4 = tf_util.conv2d(vlad_out, 512, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv3', bn_decay=bn_decay) out5 = tf_util.conv2d(out4, 2048, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='vlad_conv4', bn_decay=bn_decay) out_max = tf.nn.max_pool(out5, ksize=[1, num_point, 1, 1], strides=[1, 2, 2, 1], padding='VALID') # 32 x 1 x 1 x 1024 # classification network net = tf.reshape(out_max, [batch_size, -1]) # 32 x 1 x 1024 net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='cla/fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='cla/fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='cla/dp1') net = tf_util.fully_connected(net, cat_num, activation_fn=None, scope='cla/fc3') # segmentation network one_hot_label_expand = tf.reshape(input_label, [batch_size, 1, 1, cat_num]) out_max = tf.concat(axis=3, values=[out_max, one_hot_label_expand]) expand = tf.tile(out_max, [1, num_point, 1, 1]) concat = tf.concat(axis=3, values=[expand, out0, pool_k, vlad_out, out4, out5]) net2 = tf_util.conv2d(concat, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv1', weight_decay=weight_decay) net2 = tf_util.dropout(net2, keep_prob=0.8, is_training=is_training, scope='seg/dp1') net2 = tf_util.conv2d(net2, 256, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv2', weight_decay=weight_decay) net2 = tf_util.dropout(net2, keep_prob=0.8, is_training=is_training, scope='seg/dp2') net2 = tf_util.conv2d(net2, 128, [1, 1], padding='VALID', stride=[1, 1], bn_decay=bn_decay, bn=True, is_training=is_training, scope='seg/conv3', weight_decay=weight_decay) net2 = tf_util.conv2d(net2, part_num, [1, 1], padding='VALID', stride=[1, 1], activation_fn=None, bn=False, scope='seg/conv4', weight_decay=weight_decay) net2 = tf.reshape(net2, [batch_size, num_point, part_num]) return net, net2, end_points
3D expert convolutional architecture for 8 Gaussians """ batch_size = grid_3dmfv.get_shape()[0].value layer = 1 net = inception_module(grid_3dmfv, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[2, 4], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[2, 4], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[1, 2], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer) + scope_str) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer) + scope_str, stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1])
def get_model(points, w, mu, sigma, is_training, radius, bn_decay=None, weight_decay=0.005, original_n_points=None): """ Normal estimation architecture for multi-scale and single-scale (ms, ss) Nesti-Net Ablations :param points: a batch of point clouds with xyz coordinates [b x n x 3] :param w: GMM weights :param mu: GMM means :param sigma: GMM std :param is_training: true / false indicating training or testing :param radius: list of floats indicating radius as percentage of bounding box :param bn_decay: :param weight_decay: :param original_n_points: The original number of points in the vicinity of the query point ( used for compensating in the 3dmfv represenation) :return: net_n_est: estimated normal [b x n x 3] grid_fisher: 3dmfv representation of each points cloud in the batch """ batch_size = points.get_shape()[0].value n_rads = len(radius) n_points = points.get_shape()[1].value / n_rads n_gaussians = w.shape[0].value res = int(np.round(np.power(n_gaussians, 1.0/3.0))) for s, rad in enumerate(radius): start = s * n_points end = start + n_points if original_n_points is None: fv = tf_util.get_3dmfv_n_est(points[:, start:end, :], w, mu, sigma, flatten=True, n_original_points=None) else: fv = tf_util.get_3dmfv_n_est(points[:, start:end, :], w, mu, sigma, flatten=True, n_original_points=original_n_points[:, s]) if s == 0: grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res]) else: grid_fisher = tf.concat([grid_fisher, tf.reshape(fv, [batch_size, -1, res, res, res])], axis=1) grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1]) #CNN architecture - adjust architecture to number of gaussians if n_gaussians == 8*8*8: layer = 1 net = inception_module(grid_fisher, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool_s'+str(s)+'_l_'+str(layer), stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 4], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 4], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool_s'+str(s)+'_l_'+str(layer), stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1]) elif n_gaussians == 3 * 3 * 3: layer = 1 net = inception_module(grid_fisher, n_filters=128, kernel_sizes=[2, 3], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[2, 3], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[1, 2], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[1, 2], is_training=is_training, bn_decay=bn_decay, scope='inception_s'+str(s)+'_l_'+str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [3, 3, 3], scope='maxpool_s'+str(s)+'_l_'+str(layer), stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1]) else: raise ValueError('Unsupported number of Gaussians - you should change the architecture accordingly') # will throw error if using an unsupported number of gaussians # FC architectrure - normal estiamation network net = tf_util.fully_connected(global_feature, 1024, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp3') net_n_est = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weight_decay) net_n_est = tf.squeeze(net_n_est) if batch_size == 1: net_n_est = tf.expand_dims(net_n_est, axis=0) return net_n_est, grid_fisher
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs): super(Policy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True) print(self.obs_ph.get_shape()) net = tf.cast(self.obs_ph, tf.float32) #net = tf.Print(net, [net], "input: ", summarize=1000) print(net.get_shape()) with tf.variable_scope("model", reuse=reuse): net = tu.conv3d(inputs=net, num_output_channels=16, kernel_size=[6, 6, 6], scope="conv1", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool1", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=32, kernel_size=[5, 5, 5], scope="conv2", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool2", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=64, kernel_size=[3, 3, 3], scope="conv3", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool3", stride=[2, 2, 2], padding="VALID") print(net.get_shape()) net = tu.conv3d(inputs=net, num_output_channels=64, kernel_size=[2, 2, 2], scope="conv4", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tu.max_pool3d(inputs=net, kernel_size=[3, 3, 3], scope="pool4", stride=[1, 1, 1], padding="VALID") print(net.get_shape()) net = tf.layers.flatten(inputs=net) print(net.get_shape()) with tf.name_scope("pi_h_fc1"): pi_h = tf.layers.dense( net, 8, activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=1e-3)) print(pi_h.get_shape()) pi_latent = pi_h with tf.name_scope("vf_h_fc1"): vf_h = tf.layers.dense( net, 8, activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=1e-3)) print(vf_h.get_shape()) value_fn = tf.layers.dense(vf_h, 1, name="vf") vf_latent = vf_h self._proba_distribution, self._policy, self.q_value = \ self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01) self._value_fn = value_fn self._setup_init()
def pointnet_sa_module(cascade_id, xyz, points, bidmap, mlp_configs, block_bottom_center_mm, configs, sgf_config_pls, is_training, bn_decay, scope, bn=True, tnet_spec=None, use_xyz=True, IsShowModel=False): ''' Input cascade_id==0: xyz is grouped_points: (batch_size,nsubblock0,npoint_subblock0,6) points: None bidmap: None Input cascade_id==1: xyz: (batch_size,nsubblock0,3) points: (batch_size,nsubblock0,channel) bidmap: (batch_size,nsubblock1,npoint_subblock1) Medium cascade_id==1: grouped_xyz: (batch_size,nsubblock1,npoint_subblock1,3) new_xyz: (batch_size,nsubblock1,3) group_points: (batch_size,nsubblock1,npoint_subblock1,channel) output cascade_id==1: new_xyz: (batch_size,nsubblock1,3) new_points: (batch_size,nsubblock1,channel) ''' block_bottom_center_mm = tf.cast( block_bottom_center_mm, tf.float32, name='block_bottom_center_mm' ) # gpu_0/sa_layer3/block_bottom_center_mm:0 batch_size = xyz.get_shape()[0].value with tf.variable_scope(scope) as sc: cascade_num = configs['flatten_bm_extract_idx'].shape[ 0] - 1 # include global here (Note: cascade_num does not include global in block_pre_util ) assert configs['sub_block_step_candis'].size == cascade_num - 1 if cascade_id == 0: indrop_keep_mask = tf.get_default_graph().get_tensor_by_name( 'indrop_keep_mask:0') # indrop_keep_mask:0 assert len(xyz.shape) == 3 if bidmap == None: grouped_xyz = tf.expand_dims(xyz, 1) grouped_points = tf.expand_dims(points, 1) new_xyz = None valid_mask = None else: batch_idx = tf.reshape(tf.range(batch_size), [batch_size, 1, 1, 1]) nsubblock = bidmap.get_shape()[1].value npoint_subblock = bidmap.get_shape()[2].value batch_idx_ = tf.tile(batch_idx, [1, nsubblock, npoint_subblock, 1]) bidmap = tf.expand_dims(bidmap, axis=-1, name='bidmap') bidmap_concat = tf.concat( [batch_idx_, bidmap], axis=-1, name='bidmap_concat') # gpu_0/sa_layer0/bidmap_concat:0 # The value for invalid item in bidmap is -17. # On GPU, the responding grouped_xyz and grouped_points is 0. # NOT WORK on CPU !!! # invalid indices comes from merge_blocks_while_fix_bmap # set point_indices_f for invalid points as # NETCONFIG['redundant_points_in_block'] ( shoud be set < -500) valid_mask = tf.greater(bidmap, tf.constant( -500, tf.int32), 'valid_mask') # gpu_0/sa_layer0/valid_mask:0 grouped_xyz = tf.gather_nd( xyz, bidmap_concat, name='grouped_xyz') # gpu_0/sa_layer0/grouped_xyz:0 grouped_points = tf.gather_nd(points, bidmap_concat, name='group_points') if cascade_id == 0 and len(indrop_keep_mask.get_shape()) != 0: grouped_indrop_keep_mask = tf.gather_nd( indrop_keep_mask, bidmap_concat, name='grouped_indrop_keep_mask' ) # gpu_0/sa_layer0/grouped_indrop_keep_mask:0 # new_xyz is the "voxel center" or "mean position of points in the voxel" if configs['mean_grouping_position'] and ( not mlp_configs['block_learning'] == '3DCNN'): new_xyz = tf.reduce_mean(grouped_xyz, -2) else: new_xyz = block_bottom_center_mm[:, :, 3:6] * tf.constant( 0.001, tf.float32) # the mid can be mean or block center, decided by configs['mean_grouping_position'] sub_block_mid = tf.expand_dims( new_xyz, -2, name='sub_block_mid') # gpu_1/sa_layer0/sub_block_mid global_block_mid = tf.reduce_mean(sub_block_mid, 1, keepdims=True, name='global_block_mid') grouped_xyz_submid = grouped_xyz - sub_block_mid grouped_xyz_glomid = grouped_xyz - global_block_mid grouped_xyz_feed = [] if 'raw' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz) if 'sub_mid' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz_submid) if 'global_mid' in configs['xyz_elements']: grouped_xyz_feed.append(grouped_xyz_glomid) grouped_xyz_feed = tf.concat(grouped_xyz_feed, -1) if cascade_id == 0: # xyz must be at the first in feed_data_elements !!!! grouped_points = tf.concat( [grouped_xyz_feed, grouped_points[..., 3:]], -1) if len(indrop_keep_mask.get_shape()) != 0: if InDropMethod == 'set1st': # set all the dropped item as the first item tmp1 = tf.multiply(grouped_points, grouped_indrop_keep_mask) points_1st = grouped_points[:, :, 0:1, :] points_1st = tf.tile(points_1st, [1, 1, grouped_points.shape[2], 1]) indrop_mask_inverse = 1 - grouped_indrop_keep_mask tmp2 = indrop_mask_inverse * points_1st grouped_points = tf.add( tmp1, tmp2, name='grouped_points_droped' ) # gpu_0/sa_layer0/grouped_points_droped #tf.add_to_collection( 'check', grouped_points ) elif InDropMethod == 'set0': valid_mask = tf.logical_and( valid_mask, tf.equal(grouped_indrop_keep_mask, 0), name='valid_mask_droped' ) # gpu_1/sa_layer0/valid_mask_droped elif use_xyz: grouped_points = tf.concat([grouped_xyz_feed, grouped_points], axis=-1) tf.add_to_collection('grouped_xyz', grouped_xyz) tf.add_to_collection('grouped_xyz_submid', grouped_xyz_submid) tf.add_to_collection('grouped_xyz_glomid', grouped_xyz_glomid) if cascade_id > 0 and use_xyz and (not cascade_id == cascade_num - 1): grouped_points = tf.concat([grouped_xyz_feed, grouped_points], axis=-1) nsample = grouped_points.get_shape()[2].value # the conv kernel size if IsShowModel: print( '\n\npointnet_sa_module cascade_id:%d\n xyz:%s\n grouped_xyz:%s\n new_xyz:%s\n grouped_points:%s\n nsample:%d' % (cascade_id, shape_str([xyz]), shape_str([grouped_xyz]), shape_str([new_xyz]), shape_str([grouped_points]), nsample)) new_points = grouped_points if valid_mask != None: new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1], tf.float32) if 'growth_rate' in mlp_configs['point_encoder'][cascade_id]: new_points = tf_util.dense_net( new_points, mlp_configs['point_encoder'][cascade_id], bn, is_training, bn_decay,\ scope = 'dense_cascade_%d_point_encoder'%(cascade_id) , is_show_model = IsShowModel ) else: for i, num_out_channel in enumerate( mlp_configs['point_encoder'][cascade_id]): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d' % (i), bn_decay=bn_decay) if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['point_encoder'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='cnn_dp%d' % (i)) if IsShowModel: print('point encoder1 %d, new_points:%s' % (i, shape_str([new_points]))) if cascade_id == 0: root_point_features = new_points #if InDropMethod == 'set0': # if len(indrop_keep_mask.get_shape()) != 0: # new_points = tf.identity(new_points,'points_before_droped') # gpu_0/sa_layer0/points_before_droped:0 # new_points = tf.multiply( new_points, grouped_indrop_keep_mask, name='droped_points' ) # gpu_0/sa_layer0/droped_points:0 else: root_point_features = None pooling = mlp_configs['block_learning'] if pooling == '3DCNN' and (cascade_id == 0): pooling = 'max' #if pooling=='avg': # new_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1') #elif pooling=='weighted_avg': # with tf.variable_scope('weighted_avg1'): # dists = tf.norm(grouped_xyz,axis=-1,ord=2,keep_dims=True) # exp_dists = tf.exp(-dists * 5) # weights = exp_dists/tf.reduce_sum(exp_dists,axis=2,keep_dims=True) # (batch_size, npoint, nsample, 1) # new_points *= weights # (batch_size, npoint, nsample, mlps_0[-1]) # new_points = tf.reduce_sum(new_points, axis=2, keep_dims=True) if pooling == 'max': # Even the grouped_points and grouped_xyz are 0 for invalid points, the # vaule after mlp will not be. It has to be set as 0 forcely before # pooling. if valid_mask != None: new_points = new_points * tf.cast(valid_mask[:, :, :, 0:1], tf.float32) new_points = tf.identity( new_points, 'points_before_max') # gpu_0/sa_layer0/points_before_max new_points = tf.reduce_max(new_points, axis=[2], keepdims=True, name='points_after_max') #elif pooling=='min': # new_points = tf_util.max_pool2d(-1*new_points, [1,nsample], stride=[1,1], padding='VALID', scope='minpool1') #elif pooling=='max_and_avg': # avg_points = tf_util.max_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='maxpool1') # max_points = tf_util.avg_pool2d(new_points, [1,nsample], stride=[1,1], padding='VALID', scope='avgpool1') # new_points = tf.concat([avg_points, max_points], axis=-1) elif pooling == '3DCNN': new_points = grouped_points_to_voxel_points( cascade_id, new_points, valid_mask, block_bottom_center_mm, configs, grouped_xyz, IsShowVoxelModel=IsShowModel) if IsShowModel: print('voxel points:%s' % (shape_str([new_points]))) for i, num_out_channel in enumerate( mlp_configs['voxel_channels'][cascade_id]): kernel_i = [mlp_configs['voxel_kernels'][cascade_id][i]] * 3 stride_i = [mlp_configs['voxel_strides'][cascade_id][i]] * 3 if new_points.shape[1] % 2 == 0: padding_i = np.array([[0, 0], [1, 0], [1, 0], [1, 0], [ 0, 0 ]]) * mlp_configs['voxel_paddings'][cascade_id][i] else: padding_i = np.array([[0, 0], [1, 1], [1, 1], [1, 1], [ 0, 0 ]]) * mlp_configs['voxel_paddings'][cascade_id][i] new_points = tf.pad(new_points, padding_i, "CONSTANT") if type(num_out_channel) == int: new_points = tf_util.conv3d(new_points, num_out_channel, kernel_i, scope='3dconv_%d' % (i), stride=stride_i, padding='VALID', bn=bn, is_training=is_training, bn_decay=bn_decay, name='points_3dcnn_%d' % (i)) if IsShowModel: print('block learning by 3dcnn %d, new_points:%s' % (i, shape_str([new_points]))) elif num_out_channel == 'max': new_points = tf_util.max_pool3d(new_points, kernel_i, scope='3dmax_%d' % (i), stride=stride_i, padding='VALID') if IsShowModel: print('block learning max pooling %d, new_points:%s' % (i, shape_str([new_points]))) elif num_out_channel == 'avg': new_points = tf_util.avg_pool3d(new_points, kernel_i, scope='3dmax_%d' % (i), stride=stride_i, padding='VALID') if IsShowModel: print('block learning avg pooling %d, new_points:%s' % (i, shape_str([new_points]))) # gpu_0/sa_layer1/3dconv_0/points_3dcnn_0:0 if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['voxel_channels'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='3dcnn_dp%d' % (i)) # gpu_0/sa_layer4/3dconv_0/points_3dcnn_0:0 new_points = tf.squeeze(new_points, [1, 2, 3]) new_points = tf.reshape( new_points, [batch_size, -1, 1, new_points.shape[-1].value]) if IsShowModel: print('after %s, new_points:%s' % (pooling, shape_str([new_points]))) if 'growth_rate' in mlp_configs['block_encoder'][cascade_id]: new_points = tf_util.dense_net( new_points, mlp_configs['block_encoder'][cascade_id], bn, is_training, bn_decay, scope='dense_cascade_%d_block_encoder' % (cascade_id), is_show_model=IsShowModel) else: for i, num_out_channel in enumerate( mlp_configs['block_encoder'][cascade_id]): new_points = tf_util.conv2d(new_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv_post_%d' % (i), bn_decay=bn_decay) if configs['Cnn_keep_prob'] < 1: if (not configs['only_last_layer_ineach_cascade'] ) or i == len( mlp_configs['block_encoder'][cascade_id]) - 1: new_points = tf_util.dropout( new_points, keep_prob=configs['Cnn_keep_prob'], is_training=is_training, scope='dropout', name='cnn_dp%d' % (i)) if IsShowModel: print('block encoder %d, new_points:%s' % (i, shape_str([new_points]))) # (2, 512, 1, 64) new_points = tf.squeeze(new_points, [2]) # (batch_size, npoints, mlps_1[-1]) if IsShowModel: print( 'pointnet_sa_module return\n new_xyz: %s\n new_points:%s\n\n' % (shape_str([new_xyz]), shape_str([new_points]))) #import pdb;pdb.set_trace() # (2, 512, 64) return new_xyz, new_points, root_point_features
def get_model(points, w, mu, sigma, is_training, bn_decay=None, weight_decay=0.005, original_n_points=None, labels=None): """ Classification PFV-Network, input is BxNx3""" batch_size = points.get_shape()[0].value n_points = points.get_shape()[1].value n_gaussians = w.shape[0].value res = int(np.round(np.power(n_gaussians, 1.0 / 3.0))) if original_n_points is None: fv = tf_util.get_3dmfv_n_est(points, w, mu, sigma, flatten=True, n_original_points=None) else: fv = tf_util.get_3dmfv_n_est(points, w, mu, sigma, flatten=True, n_original_points=original_n_points) grid_fisher = tf.reshape(fv, [batch_size, -1, res, res, res]) grid_fisher = tf.transpose(grid_fisher, [0, 2, 3, 4, 1]) # Inception layer = 1 net = inception_module(grid_fisher, n_filters=128, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=256, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer), stride=[2, 2, 2], padding='SAME') layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = inception_module(net, n_filters=512, kernel_sizes=[3, 5], is_training=is_training, bn_decay=bn_decay, scope='inception' + str(layer)) layer = layer + 1 net = tf_util.max_pool3d(net, [2, 2, 2], scope='maxpool' + str(layer), stride=[2, 2, 2], padding='SAME') global_feature = tf.reshape(net, [batch_size, -1]) #normal estiamation network net = tf_util.fully_connected(global_feature, 1024, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp2') net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3', bn_decay=bn_decay, weigth_decay=weight_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp3') net_n_est = tf_util.fully_connected(net, 3, activation_fn=None, scope='fc4', is_training=is_training, weigth_decay=weight_decay) # TO DO - Add constrint to enforce unit size net_n_est = tf.squeeze(net_n_est) if batch_size == 1: net_n_est = tf.expand_dims(net_n_est, axis=0) return net_n_est, grid_fisher