def compute_box_and_sem_cls_loss(end_points, supervised_inds, config): """ Compute 3D bounding box and semantic classification loss. Args: end_points: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster mean_size_arr = config.mean_size_arr object_assignment = end_points['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss dist1, ind1, dist2, _ = nn_distance( end_points['center'][supervised_inds, ...], end_points['center_label'][:, :, 0:3]) # dist1: BxK, dist2: BxK2 box_label_mask = end_points['box_label_mask'] objectness_label = end_points['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1 * objectness_label) / (torch.sum(objectness_label) + 1e-6) centroid_reg_loss2 = \ torch.sum(dist2 * box_label_mask) / (torch.sum(box_label_mask) + 1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = torch.gather( end_points['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_heading_class = nn.CrossEntropyLoss(reduction='none') heading_class_loss = criterion_heading_class( end_points['heading_scores'][supervised_inds, ...].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) heading_residual_label = torch.gather( end_points['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(torch.sum( end_points['heading_residuals_normalized'][supervised_inds, ...] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute size loss size_class_label = torch.gather( end_points['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_size_class = nn.CrossEntropyLoss(reduction='none') size_class_loss = criterion_size_class( end_points['size_scores'][supervised_inds, ...].transpose(2, 1), size_class_label) # (B,K) size_class_loss = torch.sum(size_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) size_residual_label = torch.gather(end_points['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( end_points['size_residuals_normalized'][supervised_inds, ...] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(end_points['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls( end_points['sem_cls_scores'][supervised_inds, ...].transpose(2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss
def compute_grasp_and_sem_cls_loss(end_points, config): """ Compute grasp and semantic classification loss. Args: end_points: dict (read-only) Returns: center_loss angle_cls_loss angle_reg_loss viewpoint_cls_loss sem_cls_loss """ num_angle_bin = config.num_angle_bin num_viewpoint = config.num_viewpoint num_class = config.num_class object_assignment = end_points['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss pred_center = end_points['center'] gt_center = end_points['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 grasp_label_mask = end_points['grasp_label_mask'] objectness_label = end_points['objectness_label'].float() centroid_reg_loss1 = \ torch.sum(dist1*objectness_label)/(torch.sum(objectness_label)+1e-6) centroid_reg_loss2 = \ torch.sum(dist2*grasp_label_mask)/(torch.sum(grasp_label_mask)+1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute angle loss (in-plane rotation) angle_class_label = torch.gather( end_points['angle_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_angle_class = nn.CrossEntropyLoss(reduction='none') angle_class_loss = criterion_angle_class( end_points['angle_scores'].transpose(2, 1), angle_class_label) # (B,K) angle_class_loss = torch.sum(angle_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) angle_residual_label = torch.gather( end_points['angle_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) angle_residual_normalized_label = angle_residual_label / (np.pi / num_angle_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 angle_label_one_hot = torch.cuda.FloatTensor(batch_size, angle_class_label.shape[1], num_angle_bin).zero_() angle_label_one_hot.scatter_( 2, angle_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_angle_bin) angle_residual_normalized_loss = huber_loss(torch.sum( end_points['angle_residuals_normalized'] * angle_label_one_hot, -1) - angle_residual_normalized_label, delta=1.0) # (B,K) cuong = torch.sum( end_points['angle_residuals_normalized'] * angle_label_one_hot, -1) angle_residual_normalized_loss = torch.sum( angle_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute width loss gt_width = torch.gather(end_points['width_label'], 1, object_assignment) # select (B,K) from (B,K2) width_loss = huber_loss(torch.sum(end_points['width'], -1) - gt_width, delta=1.0) width_loss = torch.sum( width_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute quality loss gt_quality = torch.gather(end_points['quality_label'], 1, object_assignment) # select (B,K) from (B,K2) quality_loss = huber_loss(torch.sum(end_points['quality'], -1) - gt_quality, delta=1.0) quality_loss = torch.sum( quality_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # Compute viewpoint loss viewpoint_class_labell = torch.gather( end_points['viewpoint_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_viewpoint_class = nn.CrossEntropyLoss(reduction='none') viewpoint_class_loss = criterion_viewpoint_class( end_points['viewpoint_scores'].transpose(2, 1), viewpoint_class_labell) # (B,K) viewpoint_class_loss = torch.sum( viewpoint_class_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = torch.gather(end_points['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls(end_points['sem_cls_scores'].transpose( 2, 1), sem_cls_label) # (B,K) sem_cls_loss = torch.sum( sem_cls_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) return center_loss, width_loss, quality_loss, angle_class_loss, angle_residual_normalized_loss, viewpoint_class_loss, sem_cls_loss
def compute_matching_potential_loss(end_points, config, mode=''): """ Compute potential function loss with computed object proposals """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster num_class = config.num_class mean_size_arr = config.mean_size_arr object_assignment = end_points['object_assignment' + mode] batch_size = object_assignment.shape[0] # Compute center loss pred_center = end_points['center' + mode] obj_center = end_points['center' + mode] ### Get the heading here pred_heading_class = torch.argmax( end_points['heading_scores' + 'center'].detach(), -1) # B,num_proposal if config.dataset == 'scannet': pred_heading = torch.zeros_like(pred_heading_class).float() elif config.dataset == 'sunrgbd': ''' here pred_heading_opt = end_points['heading_residuals'+mode]*(np.pi/float(config.num_heading_bin)) # (8, 256, 12) ''' pred_heading_residual = torch.gather( end_points['heading_residuals' + mode].detach(), 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 pred_heading_residual.squeeze_(2) pred_heading = pred_heading_class.float() * ( 2 * np.pi / float(config.num_heading_bin)) + pred_heading_residual else: AssertionError('Dataset Config Error!') size_residual = end_points['size_residuals' + mode] size_residual_normalized = end_points['size_residuals_normalized' + mode] pred_size_class = torch.argmax( end_points['size_scores' + 'center'].contiguous(), -1).detach() pred_size_residual = torch.gather( size_residual, 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, 1, 3)) mean_size_class_batched = torch.ones_like( size_residual) * torch.from_numpy( config.mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze(0) pred_size_avg = torch.gather( mean_size_class_batched, 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1, 1, 1, 3)).detach() obj_size = pred_size_avg.squeeze(2) + pred_size_residual.squeeze( 2) # + size_residual_opt ### Get the object surface center here pred_obj_surface_center, pred_obj_line_center = get_surface_line_points_batch_pytorch( obj_size, pred_heading, obj_center) source_point = torch.cat((pred_obj_surface_center, pred_obj_line_center), 1) surface_target = end_points["surface_sel"] line_target = end_points["line_sel"] target_point = torch.cat((surface_target, line_target), 1) objectness_match_label = end_points[ 'objectness_match_label_plusscore'].float() objectness_match_label_sem = end_points[ 'objectness_match_label_plusscore_sem'].float() gt_center = end_points['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 box_label_mask = end_points['box_label_mask'] objectness_label = end_points['objectness_label' + mode].float() centroid_reg_loss1 = torch.sum( dist1 * objectness_label) / (torch.sum(objectness_label) + 1e-6) centroid_reg_loss2 = torch.sum( dist2 * box_label_mask) / (torch.sum(box_label_mask) + 1e-6) dist_match = torch.sqrt( torch.sum((source_point - target_point)**2, dim=-1) + 1e-6) centroid_reg_loss3 = torch.sum(dist_match * objectness_match_label) / ( torch.sum(objectness_match_label) + 1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 + centroid_reg_loss3 # Compute heading loss heading_class_label = torch.gather( end_points['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_heading_class = nn.CrossEntropyLoss(reduction='none') heading_class_loss = criterion_heading_class( end_points['heading_scores' + mode].transpose(2, 1), heading_class_label) # (B,K) heading_class_loss = torch.sum(heading_class_loss * objectness_label) / ( torch.sum(objectness_label) + 1e-6) #heading_class_loss = torch.tensor(0) heading_residual_label = torch.gather( end_points['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = torch.cuda.FloatTensor( batch_size, heading_class_label.shape[1], num_heading_bin).zero_() heading_label_one_hot.scatter_( 2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) if False: #mode == 'opt': heading_residual_normalized_loss = huber_loss(torch.sum( end_points['heading_residuals_normalized' + 'center'] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) else: heading_residual_normalized_loss = huber_loss(torch.sum( end_points['heading_residuals_normalized' + mode] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = torch.sum( heading_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) ### Compute the original size loss # Compute size loss size_class_label = torch.gather( end_points['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) size_residual_label = torch.gather(end_points['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat( 1, 1, 3)) # select (B,K,3) from (B,K2,3) size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() size_label_one_hot.scatter_( 2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat( 1, 1, 1, 3) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = torch.sum( size_residual_normalized * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype( np.float32)).cuda().unsqueeze(0).unsqueeze( 0) # (1,1,num_size_cluster,3) mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = torch.mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = torch.sum( size_residual_normalized_loss * objectness_label) / (torch.sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss # Optional for semantic optimization ''' sem_cls_label = torch.gather(end_points['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') sem_cls_loss = criterion_sem_cls(end_points['sem_cls_scores'+mode].transpose(2,1), sem_cls_label) # (B,K) sem_cls_loss1 = torch.sum(sem_cls_loss * objectness_label)/(torch.sum(objectness_label)+1e-6) sem_cls_loss2 = torch.sum(sem_cls_loss * objectness_match_label_sem)/(torch.sum(objectness_match_label_sem)+1e-6) ''' if config.dataset == 'scannet': return centroid_reg_loss1 + centroid_reg_loss2 + centroid_reg_loss3 + size_residual_normalized_loss elif config.dataset == 'sunrgbd': return centroid_reg_loss1 + centroid_reg_loss2 + centroid_reg_loss3 + size_residual_normalized_loss + heading_residual_normalized_loss else: AssertionError('Config Error!')
def compute_box_and_sem_cls_loss(labels, end_points, config): """ Compute 3D bounding box and semantic classification loss. Args: end_points: dict (read-only) Returns: center_loss heading_cls_loss heading_reg_loss size_cls_loss size_reg_loss sem_cls_loss """ num_heading_bin = config.num_heading_bin num_size_cluster = config.num_size_cluster num_class = config.num_class mean_size_arr = config.mean_size_arr object_assignment = end_points['object_assignment'] batch_size = object_assignment.shape[0] # Compute center loss pred_center = end_points['center'] gt_center = labels['center_label'][:, :, 0:3] dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 box_label_mask = labels['box_label_mask'] objectness_label = tf.to_float(end_points['objectness_label']) centroid_reg_loss1 = \ tf.reduce_sum(dist1*objectness_label)/(tf.reduce_sum(objectness_label)+1e-6) centroid_reg_loss2 = \ tf.reduce_sum(dist2*box_label_mask)/(tf.reduce_sum(box_label_mask)+1e-6) center_loss = centroid_reg_loss1 + centroid_reg_loss2 # Compute heading loss heading_class_label = tf.gather(labels['heading_class_label'], object_assignment, batch_dims=1) # select (B,K) from (B,K2) # criterion_heading_class = nn.CrossEntropyLoss(reduction='none') # heading_class_loss = criterion_heading_class(end_points['heading_scores'].transpose(2,1), heading_class_label) # (B,K) heading_class_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=heading_class_label, logits=end_points['heading_scores']) heading_class_loss = tf.reduce_sum( heading_class_loss * objectness_label) / (tf.reduce_sum(objectness_label) + 1e-6) heading_residual_label = tf.gather( labels['heading_residual_label'], object_assignment, batch_dims=1) # select (B,K) from (B,K2) heading_residual_normalized_label = heading_residual_label / ( np.pi / num_heading_bin) # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 heading_label_one_hot = tf.one_hot( heading_class_label, num_heading_bin) # src==1 so it's *one-hot* (B,K,num_heading_bin) heading_residual_normalized_loss = huber_loss(tf.reduce_sum( end_points['heading_residuals_normalized'] * heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) heading_residual_normalized_loss = tf.reduce_sum( heading_residual_normalized_loss * objectness_label) / (tf.reduce_sum(objectness_label) + 1e-6) # Compute size loss size_class_label = tf.gather(labels['size_class_label'], object_assignment, batch_dims=1) # select (B,K) from (B,K2) size_class_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=size_class_label, logits=end_points['size_scores']) # (B,K) size_class_loss = tf.reduce_sum(size_class_loss * objectness_label) / ( tf.reduce_sum(objectness_label) + 1e-6) size_residual_label = tf.gather( labels['size_residual_label'], object_assignment, batch_dims=1) # select (B,K,3) from (B,K2,3) size_label_one_hot = tf.one_hot( size_class_label, num_size_cluster) # src==1 so it's *one-hot* (B,K,num_size_cluster) size_label_one_hot_tiled = tf.tile(tf.expand_dims( size_label_one_hot, -1), [1, 1, 1, 3]) # (B,K,num_size_cluster,3) predicted_size_residual_normalized = tf.reduce_sum( end_points['size_residuals_normalized'] * size_label_one_hot_tiled, 2) # (B,K,3) mean_size_arr_expanded = tf.expand_dims( tf.expand_dims(tf.convert_to_tensor(mean_size_arr.astype(np.float32)), 0), 0) # (1,1,num_size_cluster,3) mean_size_label = tf.reduce_sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) size_residual_normalized_loss = tf.reduce_mean( huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) size_residual_normalized_loss = tf.reduce_sum( size_residual_normalized_loss * objectness_label) / (tf.reduce_sum(objectness_label) + 1e-6) # 3.4 Semantic cls loss sem_cls_label = tf.gather(labels['sem_cls_label'], object_assignment, batch_dims=1) # select (B,K) from (B,K2) sem_cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=sem_cls_label, logits=end_points['sem_cls_scores']) # (B,K) sem_cls_loss = tf.reduce_sum(sem_cls_loss * objectness_label) / ( tf.reduce_sum(objectness_label) + 1e-6) return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss