def backbone_local_dilate(points, featdim, knn_ind, dilate2=8, **unused): nn_8 = knn_ind[:, 0:8, :] # conv1d init_features = convolution_pointset_withBatchnorm(tf.transpose( points, perm=[0, 2, 1]), nn_8, 32, name='initconv') init_features = flex_pooling(init_features, nn_8, name='init_pool') init_features = tf.transpose(init_features, perm=[0, 2, 1]) # stage 1 newpoints1, x1 = flex_conv_dilate(points, init_features, dilate=1, knn=8, outdims=[64, 64], scope='stage1', knn_indices=nn_8, concat=False, add_se='max_pool') # stage 2 x2 = feature_conv1d_1(x1, 64, name='before_stage2_conv1d', c_last=True, ac_func=BNReLU) print(x2) newpoints2, x2 = flex_conv_dilate(newpoints1, x2, dilate=dilate2, knn=8, outdims=[128, 128], scope='stage2', knn_indices=None, concat=True, add_se='max_pool') # combine feat = feature_conv1d_1( x1, 128, 'local_stage1_shortcut', c_last=True, ac_func=BNReLU) + x2 if featdim < 128: feat = feature_conv1d_1(feat, featdim, 'final_fc', c_last=True) return newpoints2, feat
features = np.random.randn(B, Din, N).astype(np.float32) positions = np.random.randn(B, Dp, N).astype(np.float32) neighbors = np.random.randint(0, N, [B, K, N]).astype(np.int32) neighbors2 = np.random.randint(0, N, [B, K2, N2]).astype(np.int32) features = tf.convert_to_tensor(features, name='features') positions = tf.convert_to_tensor(positions, name='positions') neighbors = tf.convert_to_tensor(neighbors, name='neighbors') neighbors2 = tf.convert_to_tensor(neighbors2, name='neighbors2') net = [features] # use our FlexConv similar to a traditional convolution layer net.append(flex_convolution(net[-1], positions, neighbors, Dout)) # pool and sub-sampling are different operations net.append(flex_pooling(net[-1], neighbors)) # when ordering the points beforehand sub-sampling is simply features = net[-1][:, :, :N2] positions = positions[:, :, :N2] net.append(features) # we didn't notice any improvements using the transposed version vs. pooling net.append(flex_convolution_transpose(net[-1], positions, neighbors2, Dout2)) # of course any commonly used arguments work here as well net.append(flex_convolution(net[-1], positions, neighbors2, Dout2, trainable=False)) with tf.Session() as sess:
def flex_conv_dilate(xyz, feat, dilate, knn, outdims, scope, knn_indices=None, concat=True, add_se='max_pool', upsample=True, **unused): num_point = xyz.get_shape()[1] npoint = num_point // dilate with tf.variable_scope(scope) as sc: if dilate > 1: points_sampled, feat_sampled, kp_indices = subsample(xyz, feat, npoint, kp_idx=None) else: points_sampled, feat_sampled = xyz, feat feats_T = tf.transpose(feat_sampled, perm=[0, 2, 1]) points_T = tf.transpose(points_sampled, perm=[0, 2, 1]) if knn_indices is None: # B, knn, numpts knn_indices, distances = knn_bruteforce(points_T, k=knn) x = feats_T for i, d in enumerate(outdims): x = flexconv_withBatchnorm(x, points_T, knn_indices, d, name='flexconv_{}'.format(i)) if add_se == 'max_pool': x_pool = flex_pooling(x, knn_indices, name='se_maxpool') newx = se_res_bottleneck(x, x_pool, outdims[-1], "se") # l: B, 64, N elif add_se == 'avg_pool': x_pool = flex_avg(x, points_T, knn_indices, outdims[-1], name='se_avgpool') x_pool = x_pool * (1.0 / knn) newx = se_res_bottleneck(x, x_pool, outdims[-1], "se") # l: B, 64, N else: newx = x new_feat = tf.transpose(newx, perm=[0, 2, 1]) # B, N, outdim # upsampling if upsample and dilate > 1: dist, idx = three_nn(xyz, points_sampled) dist = tf.maximum(dist, 1e-10) norm = tf.reduce_sum((1.0 / dist), axis=2, keep_dims=True) norm = tf.tile(norm, [1, 1, 3]) weight = (1.0 / dist) / norm new_feat = three_interpolate(new_feat, idx, weight) if concat: new_feat = tf.concat(axis=2, values=[new_feat, feat]) new_feat = feature_conv1d_1(new_feat, outdims[-1], name='concat_conv1d', c_last=True, ac_func=BNReLU) return xyz, new_feat
def build_graph(self, positions, label): positions = positions / 16. - 1 # initial features are the position them self features = positions neighbors = knn_bruteforce(positions, k=16) x = features def subsample(x): # probably too simplistic, just kick out 3 of 4 points randomly # see our paper IDISS approach in the paper for better sub-sampling n = x.shape.as_list()[-1] return x[:, :, :n // 4] # similar to traditional networks for stage in range(4): if stage > 0: x = flex_pooling(x, neighbors) x = subsample(x) positions = subsample(positions) neighbors = knn_bruteforce(positions, k=16) x = flex_convolution(x, positions, neighbors, 64 * (stage + 1), activation=tf.nn.relu) x = flex_convolution(x, positions, neighbors, 64 * (stage + 1), activation=tf.nn.relu) if USE_POOLING: # either do max-pooling of all remaining points... x = tf.expand_dims(x, axis=-1) x = tf.layers.max_pooling2d(x, [1, 16], [1, 16]) else: # ... or do a flex-conv in (0, 0, 0) with all points as neighbors positions = tf.concat([positions, positions[:, :, :1] * 0], axis=-1) x = tf.concat([x, x[:, :, :1] * 0], axis=-1) K = positions.shape.as_list()[-1] neighbors = knn_bruteforce(positions, k=K) x = flex_convolution(x, positions, neighbors, 1024, activation=tf.nn.relu) x = x[:, :, -1:] # from now on just the code part we copied from the Tensorpack framework x = tf.layers.flatten(x) x = tf.layers.dense(x, 512, activation=tf.nn.relu, name='fc0') logits = tf.layers.dense(x, 10, activation=tf.identity, name='fc1') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct') accuracy = tf.reduce_mean(correct, name='accuracy') train_error = tf.reduce_mean(1 - correct, name='train_error') summary.add_moving_summary(train_error, accuracy) return cost