def get_emd_loss(pcd1, pcd2, radius): assert pcd1.shape[1] == pcd2.shape[1] num_points = tf.cast(pcd1.shape[1], tf.float32) match = tf_approxmatch.approx_match(pcd1, pcd2) cost = tf_approxmatch.match_cost(pcd1, pcd2, match) cost = cost / radius return tf.reduce_mean(cost / num_points)
def __init__(self, seq_length, num_points=128): self.ground_truth = tf.placeholder(tf.float32, [1, seq_length, num_points, 3]) self.prediction = tf.placeholder(tf.float32, [1, seq_length, num_points, 3]) gt_frames = tf.split(value=self.ground_truth, num_or_size_splits=seq_length, axis=1) gt_frames = [tf.squeeze(input=frame, axis=[1]) for frame in gt_frames] pd_frames = tf.split(value=self.prediction, num_or_size_splits=seq_length, axis=1) pd_frames = [tf.squeeze(input=frame, axis=[1]) for frame in pd_frames] cds, emds = [], [] for i in range(seq_length): match = tf_approxmatch.approx_match(gt_frames[i], pd_frames[i]) emd_distance = tf.reduce_mean( tf_approxmatch.match_cost(gt_frames[i], pd_frames[i], match)) emds.append(emd_distance) dists_forward, _, dists_backward, _ = tf_nndistance.nn_distance( pd_frames[i], gt_frames[i]) cd_distance = tf.reduce_mean(dists_forward + dists_backward) cds.append(cd_distance) self.cds = tf.stack(cds, 0) self.emds = tf.stack(emds, 0)
def earth_mover(pcd1, pcd2, radius=1.0): assert pcd1.shape[1] == pcd2.shape[1] num_points = tf.cast(pcd1.shape[1], tf.float32) match = tf_approxmatch.approx_match(pcd1, pcd2) cost = tf_approxmatch.match_cost(pcd1, pcd2, match) cost = cost / radius return tf.reduce_mean(cost / num_points)
def get_loss(pred, label, end_points): """ pred: BxNx3, label: BxNx3, """ dists_forward, _, dists_backward, _ = tf_nndistance.nn_distance( pred, label) pc_loss = tf.reduce_mean(dists_forward + dists_backward) end_points['pcloss'] = pc_loss match = tf_approxmatch.approx_match(label, pred) loss = tf.reduce_mean(tf_approxmatch.match_cost(label, pred, match)) tf.summary.scalar('loss', loss) return loss, end_points
def create_loss(output, truth, loss_type='emd'): if loss_type == 'emd': match = tf_approxmatch.approx_match(output, truth) build_loss = tf.reduce_mean( tf_approxmatch.match_cost(output, truth, match)) else: cost_p1_p2, _, cost_p2_p1, _ = tf_nndistance.nn_distance(output, truth) build_loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1) # tf.summary.scalar('build loss', build_loss) # tf.add_to_collection('losses', build_loss) return build_loss
def _reconstruction_loss(self, recon, input): #latent_code = encoder(input, self.is_training) #recon = decoder(latent_code, self.is_training) if self.loss == 'chamfer': cost_p1_p2, _, cost_p2_p1, _ = nn_distance(recon, input) loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1) elif self.loss == 'emd': match = approx_match(recon, input) loss = match_cost(recon, input, match) loss = tf.reduce_mean(loss) loss = tf.div(loss, self.point_cloud_shape[0]) # return point-wise loss return loss
def _reconstruction_loss(self, recon, input): if self.loss == 'chamfer': cost_p1_p2, _, cost_p2_p1, _ = nn_distance(recon, input) loss = tf.reduce_mean(cost_p1_p2) + tf.reduce_mean(cost_p2_p1) elif self.loss == 'emd': match = approx_match(recon, input) loss = match_cost(recon, input, match) loss = tf.reduce_mean(loss) loss = tf.div(loss, self.point_cloud_shape[0]) # return point-wise loss elif self.loss == 'hausdorff': distances = directed_hausdorff(input, recon) # partial-noisy ->fake_clean loss = tf.reduce_mean(distances) return loss
def get_loss(caps, label, end_points): """ pred: B*NUM_CLASSES, label: B, """ batch_size = caps.get_shape()[0].value one_hot_label = tf.one_hot(label, depth=NUM_CLASSES, axis=1, dtype=tf.float32) masked_v = tf.matmul(tf.squeeze(caps), tf.reshape(one_hot_label, (-1, NUM_CLASSES, 1)), transpose_a=True) v_length = tf.sqrt(tf.reduce_sum(tf.square(caps), axis=2, keep_dims=True) + epsilon) batch = tf.get_collection('batch')[0] if FLAGS.spread: # spread loss v_length = tf.reshape(v_length, shape=[-1, 1, NUM_CLASSES]) one_hot_label = tf.expand_dims(one_hot_label, axis=2) at = tf.matmul(v_length, one_hot_label) """Paper eq(5).""" m = tf.minimum(0.9, 0.4+batch*(0.9-0.2)/25000) loss = tf.square(tf.maximum(0., m - (at - v_length))) loss = tf.matmul(loss, 1. - one_hot_label) loss = tf.reduce_mean(loss) else: # 1. margin_loss M_PLUS_ = tf.minimum(1., 0.8+0.2*batch/10000) M_MINUS_ = tf.maximum(0., 0.2-0.2*batch/10000) max_l = tf.square(tf.maximum(0., M_PLUS - v_length)) max_r = tf.square(tf.maximum(0., v_length - M_MINUS)) assert max_r.get_shape() == [batch_size, NUM_CLASSES, 1, 1] # reshape: [batch_size, NUM_CLASSES, 1, 1] => [batch_size, NUM_CLASSES] max_l = tf.reshape(max_l, shape=(batch_size, -1)) max_r = tf.reshape(max_r, shape=(batch_size, -1)) T_c = one_hot_label # element-wise multiply, [batch_size, NUM_CLASSES] L_c = T_c * max_l + LAMBDA_VAL * (1 - T_c) * max_r loss = tf.reduce_mean(tf.reduce_mean(L_c, axis=1)) tf.summary.scalar('classify loss', loss) tf.add_to_collection('losses', loss) match = tf_approxmatch.approx_match(end_points['reconstruct'], end_points['l0_xyz']) reconstruct_loss = 0.0001 * tf.reduce_mean(tf_approxmatch.match_cost(end_points['reconstruct'], end_points['l0_xyz'], match)) tf.add_to_collection('losses', reconstruct_loss) return loss, reconstruct_loss
def __init__(self, batch_size, seq_length, num_points=1024, num_samples=8, knn=False, alpha=1.0, beta=1.0, learning_rate=0.001, max_gradient_norm=5.0, is_training=False): self.global_step = tf.Variable(0, trainable=False) self.inputs = tf.placeholder(tf.float32, [batch_size, seq_length, num_points, 3]) frames = tf.split(value=self.inputs, num_or_size_splits=seq_length, axis=1) frames = [tf.squeeze(input=frame, axis=[1]) for frame in frames] cell1 = PointLSTMCell(radius=1.0 + 1e-6, nsample=3 * num_samples, out_channels=128, knn=knn, pooling='max') cell2 = PointLSTMCell(radius=2.0 + 1e-6, nsample=2 * num_samples, out_channels=256, knn=knn, pooling='max') cell3 = PointLSTMCell(radius=4.0 + 1e-6, nsample=1 * num_samples, out_channels=512, knn=knn, pooling='max') # context states1 = None states2 = None states3 = None for i in range(int(seq_length / 2)): # 512 xyz1, _, _, _ = sample_and_group(int(num_points / 2), radius=0.5 + 1e-6, nsample=num_samples, xyz=frames[i], points=None, knn=False, use_xyz=False) with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE) as scope: states1 = cell1((xyz1, None), states1) s_xyz1, h_feat1, _ = states1 # 256 xyz2, feat2, _, _ = sample_and_group(int(num_points / 2 / 2), radius=1.0 + 1e-6, nsample=num_samples, xyz=s_xyz1, points=h_feat1, knn=False, use_xyz=False) feat2 = tf.reduce_max(feat2, axis=[2], keepdims=False, name='maxpool') with tf.variable_scope('encoder_2', reuse=tf.AUTO_REUSE) as scope: states2 = cell2((xyz2, feat2), states2) s_xyz2, h_feat2, _ = states2 # 128 xyz3, feat3, _, _ = sample_and_group(int(num_points / 2 / 2 / 2), radius=2.0 + 1e-6, nsample=num_samples, xyz=s_xyz2, points=h_feat2, knn=False, use_xyz=False) feat3 = tf.reduce_max(feat3, axis=[2], keepdims=False, name='maxpool') with tf.variable_scope('encoder_3', reuse=tf.AUTO_REUSE) as scope: states3 = cell3((xyz3, feat3), states3) # prediction predicted_motions = [] predicted_frames = [] input_frame = frames[int(seq_length / 2) - 1] for i in range(int(seq_length / 2), seq_length): # 512 xyz1, _, _, _ = sample_and_group(int(num_points / 2), radius=0.5 + 1e-6, nsample=num_samples, xyz=input_frame, points=None, knn=False, use_xyz=False) with tf.variable_scope('decoder_1', reuse=tf.AUTO_REUSE) as scope: states1 = cell1((xyz1, None), states1) s_xyz1, h_feat1, _ = states1 # 256 xyz2, feat2, _, _ = sample_and_group(int(num_points / 2 / 2), radius=1.0 + 1e-6, nsample=num_samples, xyz=s_xyz1, points=h_feat1, knn=False, use_xyz=False) feat2 = tf.reduce_max(feat2, axis=[2], keepdims=False, name='maxpool') with tf.variable_scope('decoder_2', reuse=tf.AUTO_REUSE) as scope: states2 = cell2((xyz2, feat2), states2) s_xyz2, h_feat2, _ = states2 # 128 xyz3, feat3, _, _ = sample_and_group(int(num_points / 2 / 2 / 2), radius=2.0 + 1e-6, nsample=num_samples, xyz=s_xyz2, points=h_feat2, knn=False, use_xyz=False) feat3 = tf.reduce_max(feat3, axis=[2], keepdims=False, name='maxpool') with tf.variable_scope('decoder_3', reuse=tf.AUTO_REUSE) as scope: states3 = cell3((xyz3, feat3), states3) s_xyz3, h_feat3, _ = states3 with tf.variable_scope('fp', reuse=tf.AUTO_REUSE) as scope: l2_feat = pointnet_fp_module(xyz2, xyz3, h_feat2, h_feat3, mlp=[256], last_mlp_activation=True, scope='fp2') l1_feat = pointnet_fp_module(xyz1, xyz2, h_feat1, l2_feat, mlp=[256], last_mlp_activation=True, scope='fp1') l0_feat = pointnet_fp_module(input_frame, xyz1, None, l1_feat, mlp=[256], last_mlp_activation=True, scope='fp0') with tf.variable_scope('fc', reuse=tf.AUTO_REUSE) as scope: predicted_motion = tf.layers.conv1d( inputs=l0_feat, filters=128, kernel_size=1, strides=1, padding='valid', data_format='channels_last', activation=tf.nn.relu, name='fc1') predicted_motion = tf.layers.conv1d( inputs=predicted_motion, filters=3, kernel_size=1, strides=1, padding='valid', data_format='channels_last', activation=None, name='fc2') predicted_motions.append(predicted_motion) input_frame += predicted_motion predicted_frames.append(input_frame) # loss if is_training: self.loss = self.emd = self.cd = 0 for i in range(int(seq_length / 2)): match = tf_approxmatch.approx_match( frames[i + int(seq_length / 2)], predicted_frames[i]) emd_distance = tf.reduce_mean( tf_approxmatch.match_cost(frames[i + int(seq_length / 2)], predicted_frames[i], match)) loss_emd = emd_distance self.emd += loss_emd dists_forward, _, dists_backward, _ = tf_nndistance.nn_distance( predicted_frames[i], frames[i + int(seq_length / 2)]) loss_cd = tf.reduce_mean(dists_forward + dists_backward) self.cd += loss_cd self.loss += (alpha * loss_cd + beta * loss_emd) self.cd /= int(seq_length / 2) self.emd /= (int(seq_length / 2) * num_points) self.loss /= int(seq_length / 2) params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( learning_rate).apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.predicted_motions = tf.stack(values=predicted_motions, axis=1) self.predicted_frames = tf.stack(values=predicted_frames, axis=1) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
# Load data data = DataFetcher(FLAGS.data_list) data.setDaemon(True) #### data.start() train_number = data.number # Initialize session # xyz1:dataset_points * 3, xyz2:query_points * 3 xyz1=tf.placeholder(tf.float32,shape=(None, 3)) xyz2=tf.placeholder(tf.float32,shape=(None, 3)) # chamfer distance dist1,idx1,dist2,idx2 = nn_distance(xyz1, xyz2) # earth mover distance, notice that emd_dist return the sum of all distance match = approx_match(xyz1, xyz2) emd_dist = match_cost(xyz1, xyz2, match) config=tf.ConfigProto() config.gpu_options.allow_growth=True config.allow_soft_placement=True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) model.load(sess) # Construct feed dictionary pkl = pickle.load(open('utils/ellipsoid/info_ellipsoid.dat', 'rb')) feed_dict = construct_feed_dict(pkl, placeholders) ### class_name = {'02828884':'bench','03001627':'chair','03636649':'lamp','03691459':'speaker','04090263':'firearm','04379243':'table','04530566':'watercraft','02691156':'plane','02933112':'cabinet','02958343':'car','03211117':'monitor','04256520':'couch','04401088':'cellphone'} model_number = {i:0 for i in class_name}