def dilated_res_block(self, feature, xyz, neigh_idx, d_out, name, is_training): # 参照Dilated Residual Block结构图 # Shared MLP(N,dout/2) f_pc = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) # 局部特征聚合模块( LoscSe,Attenntive Pooling) f_pc = self.building_block(xyz, f_pc, neigh_idx, d_out, name + 'LFA', is_training) # Shared MLP (N,2dout) f_pc = helper_tf_util.conv2d(f_pc, d_out * 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training, activation_fn=None) # Shared MLP (N,2dout) shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) # sum,lrelu return tf.nn.leaky_relu(f_pc + shortcut)
def gt_res_block(self, feature, xyz, neigh_idx, d_out, name, is_training): # xyz : pyt = b, n, 3 & tf = b, n, 3 # feature : pyt = b, c, n, 1 & tf = b, n, 1, c # neigh_idx : pyt = b, n, k & tf = b, n, k f_pc = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) stored_f_pc, f_pc = self.gtmodule(xyz, f_pc, neigh_idx, d_out, name + 'LFA', is_training) f_pc = helper_tf_util.conv2d(f_pc, d_out * 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training, activation_fn=None) stored_f_pc = helper_tf_util.conv2d(stored_f_pc, d_out * 2, [1, 1], name + 'mlp2_for_store', [1, 1], 'VALID', True, is_training, activation_fn=None) #stored_value = helper_tf_util.conv2d(localvalue, d_out, [1, 1], name + 'conv_for_store', [1, 1], 'VALID', True, is_training) shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) return tf.nn.leaky_relu(stored_f_pc + shortcut), tf.nn.leaky_relu(f_pc + shortcut)
def dilated_res_block(self, feature, xyz, neigh_idx, d_out, name, is_training): f_pc = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) f_pc = self.building_block(xyz, f_pc, neigh_idx, d_out, name + 'LFA', is_training) f_pc = helper_tf_util.conv2d(f_pc, d_out * 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training, activation_fn=None) shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) return tf.nn.leaky_relu(f_pc + shortcut)
def building_block(self, xyz, feature, neigh_idx, d_out, name, is_training): d_in = feature.get_shape()[-1].value f_xyz = self.relative_pos_encoding(xyz, neigh_idx) f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_pc_agg = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_1', is_training) f_xyz = helper_tf_util.conv2d(f_xyz, d_out // 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(f_pc_agg, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_pc_agg = self.att_pooling(f_concat, d_out, name + 'att_pooling_2', is_training) return f_pc_agg
def inference(self, inputs, is_training): d_out = self.config.d_out feature = inputs['features'] feature = tf.layers.dense(feature, 8, activation=None, name='fc0') feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training)) feature = tf.expand_dims(feature, axis=2) # ###########################Encoder############################ f_encoder_list = [] for i in range(self.config.num_layers): #ES TODO: check 'self.dilated_res_block' and convert to our implementation, GT_res_blocks. #f_encoder_i = self.dilated_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i], s_encoder_i, f_encoder_i = self.gt_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],'Encoder_layer_' + str(i), is_training, i) #print("{} : {}".format(i, f_encoder_i.shape)) f_sampled_i = self.random_sample(f_encoder_i, inputs['sub_idx'][i]) s_sampled_i = self.random_sample(s_encoder_i, inputs['sub_idx'][i]) feature = s_sampled_i if i == 0: f_encoder_list.append(f_encoder_i) f_encoder_list.append(f_sampled_i) # ###########################Encoder############################ feature = helper_tf_util.conv2d(f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1], 'decoder_0', [1, 1], 'VALID', True, is_training) # ###########################Decoder############################ f_decoder_list = [] for j in range(self.config.num_layers): f_interp_i = self.nearest_interpolation(feature, inputs['interp_idx'][-j - 1]) f_decoder_i = helper_tf_util.conv2d_transpose(tf.concat([f_encoder_list[-j - 2], f_interp_i], axis=3), f_encoder_list[-j - 2].get_shape()[-1].value, [1, 1], 'Decoder_layer_' + str(j), [1, 1], 'VALID', bn=True, is_training=is_training) feature = f_decoder_i f_decoder_list.append(f_decoder_i) # ###########################Decoder############################ f_layer_fc1 = helper_tf_util.conv2d(f_decoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training) f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training) f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1') f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.config.num_classes, [1, 1], 'fc', [1, 1], 'VALID', False, is_training, activation_fn=None) f_out = tf.squeeze(f_layer_fc3, [2]) return f_out
def building_block(self, xyz, feature, K_points, neigh_idx, d_out, name, is_training): d_in = feature.get_shape()[-1].value num_k = K_points.get_shape()[0].value xyz_neighbours = self.gather_neighbour(xyz, neigh_idx) xyz_neighbours = xyz_neighbours - tf.expand_dims(xyz, 2) xyz_neighbours = tf.expand_dims(xyz_neighbours, 3) xyz_neighbours = tf.tile(xyz_neighbours, [1, 1, 1, num_k, 1]) differences = xyz_neighbours - K_points # Get the square distances [n_points, n_neighbors, n_kpoints] sq_distances = tf.reduce_sum(tf.square(differences), axis=4) all_weights = tf.maximum(1 - tf.sqrt(sq_distances), 0.2) f_xyz = self.relative_pos_encoding(xyz, neigh_idx) f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) num_points = tf.shape(f_xyz)[1] num_f = f_xyz.get_shape()[3] f_xyz = tf.transpose(f_xyz, perm=[0, 1, 3, 2]) f_xyz = tf.einsum("uvik,uvkj->uvij", f_xyz, all_weights) # f_xyz = tf.matmul(f_xyz, all_weights) f_xyz = tf.transpose(f_xyz, perm=[0, 1, 3, 2]) f_xyz = tf.reshape(f_xyz, shape=[-1, num_points, num_k * num_f]) f_pc_agg = helper_tf_util.conv1d(f_xyz, d_out, 1, name + 'mlp2', 1, 'VALID', True, is_training) f_pc_agg = tf.reshape(f_pc_agg, [-1, num_points, 1, d_out]) return f_pc_agg
def dualdis_att_pool(feature_set, f_dis, g_dis, d_out, name, is_training): """ DDAP """ batch_size = tf.shape(feature_set)[0] num_points = tf.shape(feature_set)[1] num_neigh = tf.shape(feature_set)[2] d = feature_set.get_shape()[3].value d_dis = g_dis.get_shape()[3].value f_reshaped = tf.reshape(feature_set, shape=[-1, num_neigh, d]) f_dis_reshaped = tf.reshape(f_dis, shape=[-1, num_neigh, d_dis]) * 0.1 g_dis_reshaped = tf.reshape(g_dis, shape=[-1, num_neigh, d_dis]) concat = tf.concat([g_dis_reshaped, f_dis_reshaped, f_reshaped], axis=-1) # weight learning att_activation = tf.layers.dense(concat, d, activation=None, use_bias=False, name=name + 'dis_self_fc') att_scores = tf.nn.softmax(att_activation, axis=1) # dot product f_lc = f_reshaped * att_scores # sum f_lc = tf.reduce_sum(f_lc, axis=1) f_lc = tf.reshape(f_lc, [batch_size, num_points, 1, d]) # shared MLP f_lc = helper_tf_util.conv2d(f_lc, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) return f_lc
def building_block(self, xyz, feature, neigh_idx, d_out, name, is_training): d_in = feature.get_shape()[-1].value print(" check building block") print(" xyz", xyz.get_shape()) # ?, ?, 3 print(" feature", feature.get_shape()) # ?, ?, 1, c print(" neigh", neigh_idx) f_xyz = self.relative_pos_encoding(xyz, neigh_idx) print(" f_xyz 1", f_xyz.get_shape()) # ?, ?, ?, 10 f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) print(" f_xyz 2", f_xyz.get_shape()) # ?, ?, ?, c f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_pc_agg = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_1', is_training) f_xyz = helper_tf_util.conv2d(f_xyz, d_out // 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(f_pc_agg, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_pc_agg = self.att_pooling(f_concat, d_out, name + 'att_pooling_2', is_training) ''' print(" check building block") print(" xyz", xyz.get_shape()) # ?, ?, 3 print(" feature", feature.get_shape()) # ?, ?, 1, c #print(" d_in 1", d_in) # 1, c #print(" f_xyz 1", f_xyz.get_shape()) # ?, ?, ?, 10 #print(" f_xyz 2", f_xyz.get_shape()) # ?, ?, ?, c print(" f_neighbours", f_neighbours.get_shape()) # ?, ?, ?, c print(" f_concat", f_concat.get_shape()) # ?, ?, ?, 2c print(" f_pc_agg", f_pc_agg.get_shape()) # ?, ?, 1, 8 print(" f_xyz 3", f_xyz.get_shape()) # ?, ?, ?, c print(" f_neighbours", f_neighbours.get_shape()) # ?, ?, ?, c print(" f_concat", f_concat.get_shape()) # ?, ?, ?, c print(" f_pc_agg", f_pc_agg.get_shape()) # ?, ?, 1, 2c ''' return f_pc_agg
def building_block(self, xyz, feature, K_points, K_padding, neigh_idx, d_out, name, is_training): d_in = feature.get_shape()[-1] num_kpoints = K_points.get_shape()[0] batch_size = tf.shape(input=xyz)[0] num_points = tf.shape(input=xyz)[1] xyz_neighbours = self.gather_neighbour(xyz, neigh_idx) xyz_neighbours = xyz_neighbours - tf.expand_dims(xyz, 2) #### linear #### # xyz_neighbours = tf.expand_dims(xyz_neighbours, 3) # xyz_neighbours = tf.tile(xyz_neighbours, [1, 1, 1, num_kpoints, 1]) # differences = xyz_neighbours - K_points # # Get the square distances [n_points, n_neighbors, n_kpoints] # sq_distances = tf.reduce_sum(input_tensor=tf.square(differences), axis=4) # all_weights = tf.maximum(1 - tf.sqrt(sq_distances), 0) #### cos #### all_weights = tf.tensordot(xyz_neighbours, K_points, [[3], [1]]) all_weights = tf.maximum(all_weights, 0) + K_padding all_weights = all_weights / ( tf.reduce_sum(all_weights, axis=2, keepdims=True) + 1e-6) all_weights = tf.square(all_weights) all_weights = all_weights / ( tf.reduce_sum(all_weights, axis=2, keepdims=True) + 1e-6) all_weights = tf.where(all_weights < 0.1, 0., all_weights) #all_weights = tfa.activations.sparsemax(all_weights, axis=2) #### 1 #### f_xyz = self.relative_pos_encoding(xyz, neigh_idx) f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_concat = tf.einsum( "uvki,uvkj->uvij", f_concat, all_weights) # f_xyz = tf.matmul(f_xyz, all_weights) f_concat = tf.reshape(f_concat, shape=[-1, num_points, num_kpoints * d_in * 2]) f_pc_agg = helper_tf_util.conv1d(f_concat, d_out, 1, name + 'att_pooling_1', 1, 'VALID', True, is_training) f_pc_agg = tf.reshape(f_pc_agg, [-1, num_points, 1, d_out]) # #### 2 #### # f_xyz = helper_tf_util.conv2d(f_xyz, d_out // 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training) # f_neighbours = self.gather_neighbour(tf.squeeze(f_pc_agg, axis=2), neigh_idx) # f_concat = tf.nn.leaky_relu(tf.concat([f_neighbours, f_xyz], axis=-1)) # f_concat = tf.einsum("uvki,uvkj->uvij", f_concat, all_weights) # f_xyz = tf.matmul(f_xyz, all_weights) # f_concat = tf.reshape(f_concat, shape=[-1, num_points, num_kpoints*d_out]) # f_pc_agg = helper_tf_util.conv1d(f_concat, d_out, 1, name + 'att_pooling_2', 1, 'VALID', True, is_training) # f_pc_agg = tf.reshape(f_pc_agg, [-1, num_points, 1, d_out]) return f_pc_agg #, all_weights
def scf_module(self, feature, xyz, neigh_idx, d_out, name, is_training): """ SCF """ # Local Contextual Features # MLP 1 f_pc = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) # Local Context Learning (LPR + DDAP) f_lc, lg_volume_ratio = self.local_context_learning(xyz, f_pc, neigh_idx, d_out, name + 'LFA', is_training) # MLP 2 f_lc = helper_tf_util.conv2d(f_lc, d_out * 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training, activation_fn=None) # MLP Shotcut shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) # Global Contextual Features f_gc = tf.expand_dims(tf.concat([xyz, lg_volume_ratio], axis=-1), -2) f_gc = helper_tf_util.conv2d(f_gc, d_out * 2, [1, 1], name + 'lg', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) return tf.nn.leaky_relu(tf.concat([f_lc + shortcut, f_gc], axis=-1))
def att_pooling(feature_set, d_out, name, is_training): batch_size = tf.shape(feature_set)[0] num_points = tf.shape(feature_set)[1] num_neigh = tf.shape(feature_set)[2] d = feature_set.get_shape()[3].value f_reshaped = tf.reshape(feature_set, shape=[-1, num_neigh, d]) att_activation = tf.layers.dense(f_reshaped, d, activation=None, use_bias=False, name=name + 'fc') att_scores = tf.nn.softmax(att_activation, axis=1) f_agg = f_reshaped * att_scores f_agg = tf.reduce_sum(f_agg, axis=1) f_agg = tf.reshape(f_agg, [batch_size, num_points, 1, d]) f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) return f_agg
def att_pooling(feature_set, d_out, name, is_training): d = feature_set.get_shape()[3].value a,b,c,d = feature_set.get_shape() f_reshaped = tf.reshape(feature_set, shape=[-1, self.n, d]) att_activation = tf.layers.dense(f_reshaped, d, activation=None, use_bias=False, name=name + 'fc') att_scores = tf.nn.softmax(att_activation, axis=1) f_agg = f_reshaped * att_scores f_agg = tf.reduce_sum(f_agg, axis=1) f_agg = tf.reshape(f_agg, [self.b, self.n, 1, d]) f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) return f_agg
def local_context_learning(self, xyz, feature, neigh_idx, d_out, name, is_training): """ (LPR + DDAP) * 2 """ d_in = feature.get_shape()[-1].value # LPR local_rep, g_dis, lg_volume_ratio = self.local_polar_representation(xyz, neigh_idx) # 1 local_rep = helper_tf_util.conv2d(local_rep, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, local_rep], axis=-1) f_dis = self.cal_feature_dis(tf.squeeze(feature, axis=2), f_neighbours) f_lc = self.dualdis_att_pool(f_concat, f_dis, g_dis, d_out // 2, name + 'dis_att_pooling_1', is_training) # 2 local_rep = helper_tf_util.conv2d(local_rep, d_out // 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(f_lc, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, local_rep], axis=-1) f_dis = self.cal_feature_dis(tf.squeeze(f_lc, axis=2), f_neighbours) f_lc = self.dualdis_att_pool(f_concat, f_dis, g_dis, d_out, name + 'dis_att_pooling_2', is_training) return f_lc, lg_volume_ratio
def gt_pooling(self, feature_set, d_out, name, is_training, neigh_idx, depth): # input; # feature_set: b,n,k,g,d # output; # f_agg: b,n,g,d # attention_centrality: b,g,n _,_,_,_,d = feature_set.get_shape() n = self.n // (4**depth) identity = feature_set print("gtp - feature_set bef", feature_set.shape) feature_set = tf.reshape(feature_set, [self.b, n, self.k, -1]) # b,n,k,g*10 print("gtp - feature_set aft", feature_set.shape) att_activation = helper_tf_util.grouped_conv2d(name + 'gt_pooling', x=feature_set, w=None, num_filters=d*self.g, kernel_size=(1, 1), padding='VALID', num_groups=self.g, #l2_strength=0, l2_strength=4e-5, bias=0.0, activation=True, batchnorm_enabled=True, is_training=is_training) att_activation = tf.reshape(att_activation, [self.b, n, self.k, self.g, d]) print("gtp - att act", att_activation.shape) att_scores = tf.nn.softmax(att_activation, axis=2) # b,n,k,g,d//g print("gtp - att score", att_activation.shape) f_agg = identity * att_scores # b,n,k,g,d//g print("gtp - fagg", f_agg.shape) f_agg = tf.reduce_sum(f_agg, axis=2) # b,n,g,d//g print("gtp - red sum", f_agg.shape) f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) print("gtp - conv", f_agg.shape) # attention centrality att_scores = tf.reduce_sum(att_scores, axis=-1) # b,n,k,g att_scores = tf.transpose(att_scores, [0,3,1,2]) # b,g,n,k attention_centrality = self.attention_centrality(att_scores, neigh_idx, name, depth) print("gtp - after ac", f_agg.shape) return f_agg, attention_centrality
def gt_pooling(self, feature_set, d_out, name, is_training, neigh_idx): #TODO CHECK PLZ # input; # feature_set: b,n,k,g,d # output; # f_agg: b,n,g,d # attention_centrality: b,g,n _, _, _, _, d = feature_set.get_shape() identity = feature_set feature_set = tf.reshape(feature_set, [self.b, self.n, self.k, -1]) # b,n,k,g*10 att_activation = helper_tf_util.grouped_conv2d( name + 'gt_pooling', x=feature_set, w=None, num_filters=d * self.g, kernel_size=(1, 1), padding='VALID', num_groups=self.g, #l2_strength=0, l2_strength=4e-5, bias=0.0, activation=True, batchnorm_enabled=True, is_training=is_training) att_activation = tf.reshape(att_activation, [self.b, self.n, self.k, self.g, d]) att_scores = tf.nn.softmax(att_activation, axis=2) # b,n,k,g,d//g f_agg = identity * att_scores # b,n,k,g,d//g f_agg = tf.reduce_sum(f_agg, axis=2) # b,n,g,d//g f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) # attention centrality att_scores = tf.reduce_sum(att_scores, axis=-1) # b,n,g,k att_scores = tf.transpose(att_scores, [0, 2, 1, 3]) # b,g,n,k attention_centrality = self.attention_centrality(att_scores, neigh_idx) return f_agg, attention_centrality
def gt_pooling(feature_set, d_out, name, is_training, neigh_idx): print(" check att_pooling") print(" batch_size", batch_size) # ? = b print(" num_points", num_points) # ? = n print(" num_neigh", num_neigh) # ? = k print(" d", d) # c print(a, b, c, d) print(" f_reshaped", f_reshaped.get_shape()) # ?, ?, c print(" att_activation", att_activation.get_shape()) # ?, ?, c print(" att_scores", att_scores.get_shape()) # ?, ?, c print(" f_agg1", f_agg.get_shape()) # ?, ?, c print(" f_agg2", f_agg.get_shape()) # ?, c print(" f_agg3", f_agg.get_shape()) # ?, c print(" f_agg4", f_agg.get_shape()) # ?, c b, n, k, g, d = feature_set.get_shape() f_reshaped = tf.transpose(feature_set, perm=(0, 1, 3, 2, 4)) f_reshaped = tf.reshape(f_reshaped, [-1, k, d]) # b*n*g, k, d att_activation = tf.layers.dense(f_reshaped, d, activation=None, use_bias=False, name=name + 'fc') # b*n*g, k, d att_scores = tf.nn.softmax(att_activation, axis=1) # b*n*g, k, d, softmaxed over k f_agg = f_reshaped * att_scores # b*n*g, k, d f_agg = tf.reduce_sum(f_agg, axis=1) # b*n*g, d f_agg = tf.reshape(f_agg, [b, n, g, d]) f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) # attention centrality att_scores = tf.reshape(att_scores, [b, n, g, k, d]) att_scores = tf.reduce_sum(att_scores, axis=-1) # b,n,g,k attention_centrality = self.attention_centrality(att_scores, neigh_idx) return f_agg, attention_centrality
def att_pooling(feature_set, d_out, name, is_training): #f_pc_agg = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_1', is_training) print(" check att_pooling") print(" batch_size", batch_size) # ? = b print(" num_points", num_points) # ? = n print(" num_neigh", num_neigh) # ? = k print(" d", d) # c print(a, b, c, d) print(" f_reshaped", f_reshaped.get_shape()) # ?, ?, c print(" att_activation", att_activation.get_shape()) # ?, ?, c print(" att_scores", att_scores.get_shape()) # ?, ?, c print(" f_agg1", f_agg.get_shape()) # ?, ?, c print(" f_agg2", f_agg.get_shape()) # ?, c print(" f_agg3", f_agg.get_shape()) # ?, c print(" f_agg4", f_agg.get_shape()) # ?, c batch_size = tf.shape(feature_set)[0] num_points = tf.shape(feature_set)[1] num_neigh = tf.shape(feature_set)[2] d = feature_set.get_shape()[3].value a, b, c, d = feature_set.get_shape() f_reshaped = tf.reshape(feature_set, shape=[-1, num_neigh, d]) att_activation = tf.layers.dense(f_reshaped, d, activation=None, use_bias=False, name=name + 'fc') att_scores = tf.nn.softmax(att_activation, axis=1) f_agg = f_reshaped * att_scores f_agg = tf.reduce_sum(f_agg, axis=1) f_agg = tf.reshape(f_agg, [batch_size, num_points, 1, d]) f_agg = helper_tf_util.conv2d(f_agg, d_out, [1, 1], name + 'mlp', [1, 1], 'VALID', True, is_training) return f_agg
def gtmodule(self, xyz, feature, neigh_idx, d_out, name, is_training, first=None): # xyz : pyt = b, n, 3 & tf = b, n, 3 # feature : pyt = b, c, n, 1 & tf = b, n, 1, c # neigh_idx : pyt = b, n, k & tf = b, n, k b, n, _, d_in = feature.get_shape() g = self.config.groups k = self.config.num_neighbors neigh_idx = tf.expand_dims(neigh_idx, 1) neigh_idx = tf.repeat(neigh_idx, g, axis=1) # b,g,n,k xyz = tf.expand_dims(xyz, 1) xyz = tf.repeat(xyz, g, axis=1) # b,g,n,3 localvalue = tf.reshape(feature, [b, n, g, -1]) # b,n,g,c//g localvalue = tf.transpose(localvalue, perm=(0, 2, 1, 3)) # pointwisely concat feature and coordinates localvalue = tf.concat([localvalue, xyz]) # b,g,n,c//g+3 # first local operation localvalue, f_xyz = group_gather_neighbour( localvalue, neigh_idx, None, d_in // g) # b,g,n,k,c//g & b,g,n,k,3 f_xyz = self.relative_pos_encoding(xyz, f_xyz) # b,g,n,k,10 f_xyz = tf.transpose(f_xyz, perm=(0, 2, 3, 1, 4)) # b,n,k,g,10 d_int = f_xyz.get_shape()[4].value f_xyz = tf.reshape(f_xyz, [b, d_int * g, n, k]) # b,n,k,g*10 f_xyz = helper_tf_util.conv2d(f_xyz, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training, groups=g) # b,n,k,d_out//2 f_xyz = tf.reshape(f_xyz, [b, n, k, g, (d_out // 2) // g]) #b,n,k,g,d_out//2*g #f_xyz = tf.transpose(f_xyz, perm=(0,3,4,)) f1 = tf.concat( [localvalue, tf.reshape(f_xyz, [b, g, d_in // g, n, k])], dim=2) # b,n,k,g,c//g+d_out//2*g v1, attention_centrality = self.gt_pooling(f1, d_out // 2, name + 'gt_pooling_1', is_training) ### ended up here f_concat = tf.concat(features, axis=1) # b,g,n,k,d value = features[:, :, :, :, :cv] xyz = features[:, :, :, :, -3:] f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) pool_features = tf.reshape(pool_features, [batch_size, -1, num_neigh, d]) # local gathering and concat pe # local agg f_xyz = self.relative_pos_encoding(xyz, neigh_idx) f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) print(" f_xyz 2", f_xyz.get_shape()) # ?, ?, ?, c = print(" f_neighbours", f_neighbours.get_shape()) # ?, ?, ?, c print(" f_concat", f_concat.get_shape()) # ?, ?, ?, 2c print(" f_pc_agg", f_pc_agg.get_shape()) # ?, ?, 1, 8 f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) f_pc_agg = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_1', is_training)
def inference(self, inputs, is_training): d_out = self.config.d_out feature = inputs['features'] feature = tf.layers.dense(feature, 8, activation=None, name='fc0') feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training)) feature = tf.expand_dims(feature, axis=2) # ###########################Encoder############################ f_encoder_list = [] for i in range(self.config.num_layers): f_encoder_i = self.dilated_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i], 'Encoder_layer_' + str(i), is_training) f_sampled_i = self.random_sample(f_encoder_i, inputs['sub_idx'][i]) feature = f_sampled_i if i == 0: f_encoder_list.append(f_encoder_i) f_encoder_list.append(f_sampled_i) # ###########################Encoder############################ feature = helper_tf_util.conv2d(f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1], 'decoder_0', [1, 1], 'VALID', True, is_training) # # bboxes head # bboxes_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'bboxes_fc1', [1, 1], 'VALID', True, is_training) # bboxes_layer_fc2 = helper_tf_util.conv2d(bboxes_layer_fc1, 32, [1, 1], 'bboxes_fc2', [1, 1], 'VALID', True, is_training) # bboxes_layer_drop = helper_tf_util.dropout(bboxes_layer_fc2, keep_prob=0.5, is_training=is_training, scope='bboxes_dp1') # bboxes_layer_fc3 = helper_tf_util.conv2d(bboxes_layer_drop, self.num_target_attributes-1, [1, 1], 'bboxes_fc', [1, 1], 'VALID', False, # is_training, activation_fn=None) # bboxes_out = tf.squeeze(bboxes_layer_fc3, [2]) # # fgbg head # fgbg_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'fgbg_fc1', [1, 1], 'VALID', True, is_training) # fgbg_layer_fc2 = helper_tf_util.conv2d(fgbg_layer_fc1, 32, [1, 1], 'fgbg_fc2', [1, 1], 'VALID', True, is_training) # fgbg_layer_drop = helper_tf_util.dropout(fgbg_layer_fc2, keep_prob=0.5, is_training=is_training, scope='fgbg_dp1') # fgbg_layer_fc3 = helper_tf_util.conv2d(fgbg_layer_drop, 1, [1, 1], 'fgbg_fc', [1, 1], 'VALID', False, # is_training, activation_fn=None) # fgbg_out = tf.squeeze(fgbg_layer_fc3, [2]) # # classification head # cls_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'cls_fc1', [1, 1], 'VALID', True, is_training) # cls_layer_fc2 = helper_tf_util.conv2d(cls_layer_fc1, 32, [1, 1], 'cls_fc2', [1, 1], 'VALID', True, is_training) # cls_layer_drop = helper_tf_util.dropout(cls_layer_fc2, keep_prob=0.5, is_training=is_training, scope='cls_dp1') # cls_layer_fc3 = helper_tf_util.conv2d(cls_layer_drop, self.num_classes, [1, 1], 'cls_fc', [1, 1], 'VALID', False, # is_training, activation_fn=None) # cls_out = tf.squeeze(cls_layer_fc3, [2]) # return bboxes_out, fgbg_out, cls_out f_layer_fc1 = helper_tf_util.conv2d(f_encoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training) f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training) f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1') # f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.num_output_attributes, [1, 1], 'fc', [1, 1], 'VALID', False, # is_training, activation_fn=None) f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.num_fgbg_attributes, [1, 1], 'fc', [1, 1], 'VALID', False, is_training, activation_fn=None) f_out = tf.squeeze(f_layer_fc3, [2]) return f_out
def gtmodule(self, xyz, feature, neigh_idx, d_out, name, is_training, depth, first=None): # xyz : pyt = b, n, 3 & tf = b, n, 3 # feature : pyt = b, c, n, 1 & tf = b, n, 1, c # neigh_idx : pyt = b, n, k & tf = b, n, k _,_,_,d_in = feature.get_shape() ld_out = int(d_out * 0.75) nld_out = int(d_out * 0.25) n = self.n // (4**depth) print("depth : {}".format(depth)) print("xyz", xyz.shape) print("fea", feature.shape) print("ldout", ld_out) print("nldout", nld_out) neigh_idx = tf.tile(tf.expand_dims(neigh_idx, axis=1), [1,self.g,1,1]) # b,1,n,k -> b,g,n,k xyz = tf.tile(tf.expand_dims(xyz, axis=1), [1,self.g,1,1]) # b,1,n,3 -> b,g,n,3 # Stage1 : Local Layer localvalue = tf.reshape(feature, [-1, n, self.g, d_in//self.g]) # b,n,g,c//g print("lv - res", localvalue.shape) localvalue = tf.transpose(localvalue, perm=(0,2,1,3)) # b,g,n,c//g print("lv - tra", localvalue.shape) # Pointwisely concat feature and coordinates localvalue = tf.concat([localvalue, xyz], axis=-1) # b,g,n,c//g+3 print("lv - cat", localvalue.shape) # First local operation localvalue, f_xyz = self.group_gather_neighbour(localvalue, neigh_idx, None, d_in//self.g) # b,g,n,k,c//g & b,g,n,k,3 print("lv - gat", localvalue.shape) f_xyz = self.relative_pos_encoding(xyz, f_xyz) # b,g,n,k,10 localvalue = tf.transpose(localvalue, perm=(0,2,3,1,4)) # b,n,k,g,c//g print("lv - tra", localvalue.shape) f_xyz = tf.transpose(f_xyz, perm=(0,2,3,1,4)) # b,n,k,g,10 f_xyz = tf.reshape(f_xyz, [self.b, n, self.k, -1]) # b,n,k,g*10 f_xyz = helper_tf_util.grouped_conv2d(name + 'local_pe', x=f_xyz, w=None, num_filters=d_in, kernel_size=(1, 1), padding='VALID', num_groups=self.g, #l2_strength=0, l2_strength=4e-5, bias=0.0, activation=True, batchnorm_enabled=True, is_training=is_training) f_xyz = tf.reshape(f_xyz, [self.b, n, self.k, self.g, d_in//self.g]) #b,n,k,g,d_in//g localvalue = tf.concat([localvalue, f_xyz], axis=-1) # b,n,k,g,2*d_in//g print("lv - cat", localvalue.shape) localvalue, attention_centrality = self.gt_pooling(localvalue, ld_out//self.g, name + 'gt_pooling_1', is_training, neigh_idx, depth) print("lv - gtp", localvalue.shape) # Stage2 : Global Layer # Global Indexing _, ac_idx = tf.math.top_k(attention_centrality, k=self.knl) # sorted? # bgk ac_idx = tf.tile(tf.expand_dims(ac_idx, axis=2), [1,1,n,1]) # bgk -> bgnk # Pointwisely concat feature and coordinates globalvalue = tf.concat([tf.transpose(localvalue, [0,2,1,3]), xyz], axis=-1) # b,g,n,c//g+3 print("gv - cat", localvalue.shape) # First local operation globalvalue, g_xyz = self.group_gather_neighbour(globalvalue, ac_idx, None, ld_out//self.g) # b,g,n,k,c//g & b,g,n,k,3 print("gv - gat", localvalue.shape) g_xyz = self.relative_pos_encoding(xyz, g_xyz) # b,g,n,k,d globalvalue = tf.transpose(globalvalue, perm=(0,2,3,1,4)) # b,n,k,g,d print("gv - tra", localvalue.shape) g_xyz = tf.transpose(g_xyz, perm=(0,2,3,1,4)) # b,n,k,g,d g_xyz = tf.reshape(g_xyz, [self.b, n, self.k, -1]) # b,n,k,g*d g_xyz = helper_tf_util.grouped_conv2d(name + 'global_pe', x=g_xyz, w=None, num_filters=ld_out, kernel_size=(1, 1), padding='VALID', num_groups=self.g, #l2_strength=0, l2_strength=4e-5, bias=0.0, activation=True, batchnorm_enabled=True, is_training=is_training) g_xyz = tf.reshape(g_xyz, [self.b, n, self.k, self.g, ld_out//self.g]) #b,n,k,g,d_out//2*g globalvalue = tf.concat([globalvalue, g_xyz], axis=-1) # b,n,k,g,c//g+d_out//2*g print("gv - cat", localvalue.shape) # Gating globalvalue, attention_centrality = self.gt_pooling(globalvalue, nld_out//self.g, name + 'gt_pooling_2', is_training, neigh_idx, depth) print("gv - gtp", globalvalue.shape) globalvalue = globalvalue * tf.math.tanh(tf.expand_dims(tf.transpose(attention_centrality, [0,2,1]), axis=-1)) #bngc, bng1 print("gv - tan", globalvalue.shape) # Cat Two localvalue = tf.reshape(localvalue, [self.b, n, 1, -1]) globalvalue = tf.reshape(globalvalue, [self.b, n, 1, -1]) print("lv - res", localvalue.shape) print("gv - res", globalvalue.shape) conferred_value = tf.concat([localvalue, globalvalue], axis=-1) print("cv", conferred_value.shape) conferred_value = tf.layers.batch_normalization(conferred_value, -1, 0.99, 1e-6, training=is_training) stored_value = helper_tf_util.conv2d(localvalue, d_out, [1, 1], name + 'conv_for_store', [1, 1], 'VALID', True, is_training) print("sv", stored_value.shape) return stored_value, conferred_value
def dilated_res_block2(self, feature, xyz, K_points, K_padding, neigh_idx, d_out, name, is_training): d_in = feature.get_shape()[-1] num_kpoints = K_points.get_shape()[0] batch_size = tf.shape(input=xyz)[0] num_points = tf.shape(input=xyz)[1] ####### 1 ####### f_xyz = tf.expand_dims(xyz, 2) #f_out_1 = helper_tf_util.conv2d(tf.concat([feature, f_xyz], axis=-1), d_out // 2, [1, 1], \ # name + 'mlp1', [1, 1], 'VALID', True, is_training) ####### 2,3,4 ####### neighbor_xyz = self.gather_neighbour(xyz, neigh_idx) xyz_tile = tf.tile(f_xyz, [1, 1, tf.shape(input=neigh_idx)[-1], 1]) relative_xyz = neighbor_xyz - xyz_tile relative_dis = tf.sqrt( tf.reduce_sum(input_tensor=tf.square(relative_xyz), axis=-1, keepdims=True)) f_xyz = tf.concat([relative_dis, relative_xyz, xyz_tile, neighbor_xyz], axis=-1) f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) f_concat = tf.concat([f_neighbours, f_xyz], axis=-1) ####### 2 ####### #### cos #### all_weights = tf.tensordot(relative_xyz, K_points, [[3], [1]]) all_weights = tf.maximum(all_weights, 0) + K_padding all_weights = all_weights / ( tf.reduce_sum(all_weights, axis=2, keepdims=True) + 1e-6) all_weights = tf.square(all_weights) all_weights = all_weights / ( tf.reduce_sum(all_weights, axis=2, keepdims=True) + 1e-6) all_weights = tf.where(all_weights < 0.1, 0., all_weights) f_pc_agg = tf.einsum( "uvki,uvkj->uvij", f_concat, all_weights) # f_xyz = tf.matmul(f_xyz, all_weights) f_pc_agg = tf.reshape(f_pc_agg, shape=[-1, num_points, num_kpoints * d_in * 2]) f_pc_agg = helper_tf_util.conv1d(f_pc_agg, d_out // 2, 1, name + 'att_pooling_1', 1, 'VALID', True, is_training) f_out_2 = tf.reshape(f_pc_agg, [-1, num_points, 1, d_out // 2]) ####### 3 ####### f_out_3 = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_2', is_training) ####### 4 ####### f_out_4 = helper_tf_util.conv2d(f_concat, d_out // 2, [1, 1], name + 'mlp3', [1, 1], 'VALID', True, is_training) f_out_4 = tf.reduce_max(f_out_4, axis=2, keepdims=True) f_out = tf.concat([f_out_2, f_out_3, f_out_4], axis=-1) f_out = helper_tf_util.conv2d(f_out_3, d_out * 2, [1, 1], name + 'mlp4', [1, 1], 'VALID', True, is_training) shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID', activation_fn=None, bn=True, is_training=is_training) return tf.nn.leaky_relu(f_out + shortcut)
def bilateral_context_block(self, feature, xyz, neigh_idx, d_out, name, is_training): """ Inputs: feature: [B, N, 1, c] input features xyz: [B, N, 3] input coordinates neigh_idx: [B, N, k] indices of k neighbors Output: output_feat: [B, N, 1, 2*d_out] encoded (output) features shifted_neigh_xyz: [B, N, k, 3] shifted neighbor coordinates, for augmentation loss """ batch_size = tf.shape(xyz)[0] num_points = tf.shape(xyz)[1] # Input Encoding feature = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training) # Bilateral Augmentation neigh_feat = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx) # B, N, k, d_out/2 neigh_xyz = self.gather_neighbour(xyz, neigh_idx) # B, N, k, 3 tile_feat = tf.tile(feature, [1, 1, self.config.k_n, 1]) # B, N, k, d_out/2 tile_xyz = tf.tile(tf.expand_dims(xyz, axis=2), [1, 1, self.config.k_n, 1]) # B, N, k, 3 feat_info = tf.concat([neigh_feat - tile_feat, tile_feat], axis=-1) # B, N, k, d_out neigh_xyz_offsets = helper_tf_util.conv2d(feat_info, xyz.get_shape()[-1].value, [1, 1], name + 'mlp5', [1, 1], 'VALID', True, is_training) # B, N, k, 3 shifted_neigh_xyz = neigh_xyz + neigh_xyz_offsets # B, N, k, 3 xyz_info = tf.concat( [neigh_xyz - tile_xyz, shifted_neigh_xyz, tile_xyz], axis=-1) # B, N, k, 9 neigh_feat_offsets = helper_tf_util.conv2d( xyz_info, feature.get_shape()[-1].value, [1, 1], name + 'mlp6', [1, 1], 'VALID', True, is_training) # B, N, k, d_out/2 shifted_neigh_feat = neigh_feat + neigh_feat_offsets # B, N, k, d_out/2 xyz_encoding = helper_tf_util.conv2d(xyz_info, d_out // 2, [1, 1], name + 'mlp7', [1, 1], 'VALID', True, is_training) # B, N, k, d_out/2 feat_info = tf.concat([shifted_neigh_feat, feat_info], axis=-1) # B, N, k, 3/2*d_out feat_encoding = helper_tf_util.conv2d(feat_info, d_out // 2, [1, 1], name + 'mlp8', [1, 1], 'VALID', True, is_training) # B, N, k, d_out/2 # Mixed Local Aggregation overall_info = tf.concat([xyz_encoding, feat_encoding], axis=-1) # B, N, k, d_out k_weights = helper_tf_util.conv2d(overall_info, overall_info.get_shape()[-1].value, [1, 1], name + 'mlp9', [1, 1], 'VALID', bn=False, activation_fn=None) # B, N, k, d_out k_weights = tf.nn.softmax(k_weights, axis=2) # B, N, k, d_out overall_info_weighted_sum = tf.reduce_sum( overall_info * k_weights, axis=2, keepdims=True) # B, N, 1, d_out overall_info_max = tf.reduce_max(overall_info, axis=2, keepdims=True) # B, N, 1, d_out overall_encoding = tf.concat( [overall_info_max, overall_info_weighted_sum], axis=-1) # B, N, 1, 2*d_out # Output Encoding overall_encoding = helper_tf_util.conv2d(overall_encoding, d_out, [1, 1], name + 'mlp10', [1, 1], 'VALID', True, is_training) # B, N, 1, d_out output_feat = helper_tf_util.conv2d( overall_encoding, d_out * 2, [1, 1], name + 'mlp11', [1, 1], 'VALID', True, is_training, activation_fn=tf.nn.leaky_relu) # B, N, 1, 2*d_out return output_feat, shifted_neigh_xyz
def inference(self, inputs, is_training): d_out = self.config.d_out ratio = self.config.sub_sampling_ratio k_n = self.config.k_n feature = inputs['features'] og_xyz = feature[:, :, :3] feature = tf.layers.dense(feature, 8, activation=None, name='fc0') feature = tf.nn.leaky_relu( tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training)) feature = tf.expand_dims(feature, axis=2) # ###########################Encoder############################ f_encoder_list = [] input_xyz = og_xyz input_up_samples = [] new_xyz_list = [] xyz_list = [] n_pts = self.config.num_points for i in range(self.config.num_layers): # Farthest Point Sampling: input_neigh_idx = tf.py_func(DP.knn_search, [input_xyz, input_xyz, k_n], tf.int32) n_pts = n_pts // ratio[i] sub_xyz, inputs_sub_idx = tf.cond( tf.equal(is_training, tf.constant(True)), lambda: sampling( self.config.batch_size, n_pts, input_xyz, input_neigh_idx), lambda: sampling(self.config.val_batch_size, n_pts, input_xyz, input_neigh_idx)) inputs_interp_idx = tf.py_func(DP.knn_search, [sub_xyz, input_xyz, 1], tf.int32) input_up_samples.append(inputs_interp_idx) # Bilateral Context Encoding f_encoder_i, new_xyz = self.bilateral_context_block( feature, input_xyz, input_neigh_idx, d_out[i], 'Encoder_layer_' + str(i), is_training) f_sampled_i = self.random_sample(f_encoder_i, inputs_sub_idx) feature = f_sampled_i if i == 0: f_encoder_list.append(f_encoder_i) f_encoder_list.append(f_sampled_i) xyz_list.append(input_xyz) new_xyz_list.append(new_xyz) input_xyz = sub_xyz # ###########################Encoder############################ # ###########################Decoder############################ # Adaptive Fusion Module f_multi_decoder = [] # full-sized feature maps f_weights_decoders = [] # point-wise adaptive fusion weights for n in range(self.config.num_layers): feature = f_encoder_list[-1 - n] feature = helper_tf_util.conv2d(feature, feature.get_shape()[3].value, [1, 1], 'decoder_0' + str(n), [1, 1], 'VALID', True, is_training) f_decoder_list = [] for j in range(self.config.num_layers - n): f_interp_i = self.nearest_interpolation( feature, input_up_samples[-j - 1 - n]) f_decoder_i = helper_tf_util.conv2d_transpose( tf.concat([f_encoder_list[-j - 2 - n], f_interp_i], axis=3), f_encoder_list[-j - 2 - n].get_shape()[-1].value, [1, 1], 'Decoder_layer_' + str(n) + '_' + str(j), [1, 1], 'VALID', bn=True, is_training=is_training) feature = f_decoder_i f_decoder_list.append(f_decoder_i) # collect full-sized feature maps which are upsampled from multiple resolutions f_multi_decoder.append(f_decoder_list[-1]) # summarize point-level information curr_weight = helper_tf_util.conv2d(f_decoder_list[-1], 1, [1, 1], 'Decoder_weight_' + str(n), [1, 1], 'VALID', bn=False, activation_fn=None) f_weights_decoders.append(curr_weight) # regress the fusion parameters f_weights = tf.concat(f_weights_decoders, axis=-1) f_weights = tf.nn.softmax(f_weights, axis=-1) # adptively fuse them by calculating a weighted sum f_decoder_final = tf.zeros_like(f_multi_decoder[-1]) for i in range(len(f_multi_decoder)): f_decoder_final = f_decoder_final + tf.tile( tf.expand_dims(f_weights[:, :, :, i], axis=-1), [1, 1, 1, f_multi_decoder[i].get_shape()[-1].value ]) * f_multi_decoder[i] # ###########################Decoder############################ f_layer_fc1 = helper_tf_util.conv2d(f_decoder_final, 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training) f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training) f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1') f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.config.num_classes, [1, 1], 'fc', [1, 1], 'VALID', False, is_training, activation_fn=None) f_out = tf.squeeze(f_layer_fc3, [2]) return f_out, new_xyz_list, xyz_list
def inference(self, inputs, is_training): """similar to pytorch's forward() function where the RandLA-Net architecture is implemented by an encoder-decoder structure-yc In the encoder, LocSE block and RandomSampling is used where LocSE consists of gather_neighbors, relative_pos_encoding, att_pooling() In the decoder, nearest interpolation is used w. short-cut connections Args: inputs ([type]): a dict containing all kinds of required inputs is_training (bool): training or not Returns: tensor: logits for segmentation scores """ d_out = self.config.d_out feature = inputs['features'] # (B,N,6) feature = tf.layers.dense(feature, 8, activation=None, name='fc0') # (B,N,8) feature = tf.nn.leaky_relu( tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training)) feature = tf.expand_dims( feature, axis=2) # expand 1 more dim to use Conv2D ops, (B,N,1,8) # ###########################Encoder############################ f_encoder_list = [ ] # in the end, collect num_layers + 1 items for a group of hierarchical point feature embeddings for i in range(self.config.num_layers): f_encoder_i = self.dilated_res_block( feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i], 'Encoder_layer_' + str(i), is_training) # similar to LAO for local feature learning f_sampled_i = self.random_sample( f_encoder_i, inputs['sub_idx'][i]) # down-sampled the input using the idx feature = f_sampled_i if i == 0: f_encoder_list.append(f_encoder_i) f_encoder_list.append( f_sampled_i ) # (B,N,1,32), (B,N/4,1,32), (B,N/16,1,128), (B,N/64,1,256), (B,N/256,1,512), (B,N/512,1,1024) # ###########################Encoder############################ # transition using a MLP/pointwise Conv2D, e.g., (N/512,1024)-> (N/512,1024) feature = helper_tf_util.conv2d( f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1], 'decoder_0', [1, 1], 'VALID', True, is_training) # ###########################Decoder############################ f_decoder_list = [] for j in range(self.config.num_layers): f_interp_i = self.nearest_interpolation( feature, inputs['interp_idx'][-j - 1] ) # interpolate w. the idx, (B,N/512,1024)-> (B,N/256,1,1024) f_decoder_i = helper_tf_util.conv2d_transpose( tf.concat([f_encoder_list[-j - 2], f_interp_i], axis=3), f_encoder_list[-j - 2].get_shape()[-1].value, [1, 1], 'Decoder_layer_' + str(j), [1, 1], 'VALID', bn=True, is_training=is_training) # shortcut connection feature = f_decoder_i f_decoder_list.append(f_decoder_i) # upsampled point embeddings-yc # ###########################Decoder############################ # obtain classification scores using FCs (8->64,32(w. dropouts),num_classes) f_layer_fc1 = helper_tf_util.conv2d(f_decoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training) f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training) f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1') f_layer_fc3 = helper_tf_util.conv2d( f_layer_drop, self.config.num_classes, [1, 1], 'fc', [1, 1], 'VALID', False, is_training, activation_fn=None) # (B,N,1,num_classes) f_out = tf.squeeze(f_layer_fc3, [2]) # (B,N,num_classes) return f_out