def test_masked_avg(self): tf.reset_default_graph() data = tf.placeholder(tf.float32, shape=[None, None]) mask = tf.placeholder(tf.float32, shape=[None, None]) masked_avgs = utils.masked_avg(data, mask) with self.test_session() as sess: result = sess.run(masked_avgs, feed_dict={ data: [[1, 2, 3], [4, 5, 6]], mask: [[1, 0, 1], [0, 1, 0]] }) self.assertAllClose(result, [[2], [5]]) result = sess.run(masked_avgs, feed_dict={ data: [[1, 2, 3], [4, 5, 6]], mask: [[0, 1, 0], [1, 0, 1]] }) self.assertAllClose(result, [[2], [5]]) result = sess.run(masked_avgs, feed_dict={ data: [[1, 2, 3], [4, 5, 6]], mask: [[0, 0, 0], [0, 0, 0]] }) self.assertAllClose(result, [[0], [0]])
def build_loss(self, predictions, **kwargs): """Build tf graph to compute loss. Args: predictions: dict of prediction results keyed by name. Returns: loss_dict: dict of loss tensors keyed by name. """ options = self._model_proto (image_id, image_ids_gathered, similarity) = (predictions[_FIELD_IMAGE_ID], predictions[_FIELD_IMAGE_IDS_GATHERED], predictions[_FIELD_SIMILARITY]) distance = 1.0 - similarity pos_mask = tf.cast( tf.equal(tf.expand_dims(image_id, axis=1), tf.expand_dims(image_ids_gathered, axis=0)), tf.float32) neg_mask = 1.0 - pos_mask if options.triplet_ap_use_avg: distance_ap = utils.masked_avg(distance, pos_mask) else: distance_ap = utils.masked_maximum(distance, pos_mask) # negatives_outside: smallest D_an where D_an > D_ap. mask = tf.cast(tf.greater(distance, distance_ap), tf.float32) mask = mask * neg_mask negatives_outside = utils.masked_minimum(distance, mask) # negatives_inside: largest D_an. negatives_inside = utils.masked_maximum(distance, neg_mask) # distance_an: the semihard negatives. mask_condition = tf.greater(tf.reduce_sum(mask, axis=1, keepdims=True), 0.0) distance_an = tf.where(mask_condition, negatives_outside, negatives_inside) # Triplet loss. losses = tf.maximum(distance_ap - distance_an + options.triplet_margin, 0) return { 'triplet_loss': tf.reduce_mean(losses), }
def _create_lv0_edge_scores(self, proposal_repr, slogan_repr, label_repr, dbpedia_repr, proposal_mask, slogan_mask, label_mask, dbpedia_mask): """Creates adjacency matrix. Each elem denotes an edge weight. Args: proposal_repr: A [batch, max_proposal_num, dims] float tensor. slogan_repr: A [batch, max_slogan_num, dims] float tensor. Returns: proposal_scores: A [batch, max_proposal_num] float tensor denoting weights of different proposals. slogan_scores: A [batch, max_slogan_num] float tensor denoting weights of different slogans. label_to_proposal_scores: A [batch, max_proposal_num, max_label_num] tensor. dbpedia_to_slogan_scores: A [batch, max_slogan_num, max_dbpedia_num] tensor. """ options = self._options is_training = self._is_training (batch_i, max_proposal_num, max_slogan_num, max_label_num, max_dbpedia_num) = (proposal_repr.get_shape()[0].value, utils.get_tensor_shape(proposal_repr)[1], utils.get_tensor_shape(slogan_repr)[1], utils.get_tensor_shape(label_repr)[1], utils.get_tensor_shape(dbpedia_repr)[1]) with tf.name_scope('create_lv0_attention_weights'): # Predictions for updating slogan. # slogan_dbpedia_to_proposal_scores shape = # [batch, max_proposal_num, max_slogan_num + max_dbpedia_num]. # slogan_dbpedia_to_slogan_scores shape = # [batch, max_slogan_num, max_slogan_num + max_dbpedia_num]. slogan_dbpedia_repr = tf.concat([slogan_repr, dbpedia_repr], axis=1) slogan_dbpedia_to_proposal_scores = self._create_edge_weights_helper( proposal_repr, slogan_dbpedia_repr, scope='slogan_dbpedia_to_proposal_scores') slogan_dbpedia_to_slogan_scores = self._create_edge_weights_helper( slogan_repr, slogan_dbpedia_repr, scope='slogan_dbpedia_to_slogan_scores') # Predictions for updating proposal. # proposal_label_to_proposal_scores shape = # [batch, max_proposal_num, max_proposal_num + max_label_num]. # proposal_label_to_slogan_scores shape = # [batch, max_slogan_num, max_proposal_num + max_label_num] proposal_label_repr = tf.concat([proposal_repr, label_repr], axis=1) proposal_label_to_proposal_scores = self._create_edge_weights_helper( proposal_repr, proposal_label_repr, scope='proposal_label_to_proposal_scores') proposal_label_to_slogan_scores = self._create_edge_weights_helper( slogan_repr, proposal_label_repr, scope='proposal_label_to_slogan_scores') # Compute slogan_dbpedia_to_slogan_scores. # slogan_dbpedia_context_scores shape = [batch, 1, max_slogan_num + max_dbpedia_num] # slogan_dbpedia_to_slogan_scores shape = [batch, max_slogan_num, max_slogan_num + max_dbpedia_num] slogan_dbpedia_context_scores = utils.masked_avg( slogan_dbpedia_to_proposal_scores, mask=tf.expand_dims(proposal_mask, 2), dim=1) slogan_dbpedia_to_slogan_scores = tf.add( slogan_dbpedia_context_scores, slogan_dbpedia_to_slogan_scores) slogan_scores = tf.linalg.diag_part( tf.slice(slogan_dbpedia_to_slogan_scores, begin=[0, 0, 0], size=[batch_i, max_slogan_num, max_slogan_num])) dbpedia_to_slogan_scores = tf.slice( slogan_dbpedia_to_slogan_scores, begin=[0, 0, max_slogan_num], size=[batch_i, max_slogan_num, max_dbpedia_num]) # Compute proposal_label_to_proposal_scores. # proposal_label_context_scores shape = [batch, 1, max_proposal_num + max_label_num] # proposal_label_to_proposal_scores shape = [batch, max_proposal_num, max_proposal_num + max_label_num] proposal_label_context_scores = utils.masked_avg( proposal_label_to_slogan_scores, mask=tf.expand_dims(slogan_mask, 2), dim=1) proposal_label_to_proposal_scores = tf.add( proposal_label_context_scores, proposal_label_to_proposal_scores) proposal_scores = tf.linalg.diag_part( tf.slice(proposal_label_to_proposal_scores, begin=[0, 0, 0], size=[batch_i, max_proposal_num, max_proposal_num])) label_to_proposal_scores = tf.slice( proposal_label_to_proposal_scores, begin=[0, 0, max_proposal_num], size=[batch_i, max_proposal_num, max_label_num]) return proposal_scores, slogan_scores, label_to_proposal_scores, dbpedia_to_slogan_scores
def _create_lv1_edge_scores(self, proposal_repr, slogan_repr, proposal_mask, slogan_mask): """Creates adjacency matrix. Each elem denotes an edge weight. Args: proposal_repr: A [batch, max_proposal_num, dims] float tensor. slogan_repr: A [batch, max_slogan_num, dims] float tensor. Returns: proposal_scores: A [batch, max_proposal_num] float tensor denoting weights of different proposals. slogan_scores: A [batch, max_slogan_num] float tensor denoting weights of different slogans. """ options = self._options is_training = self._is_training with tf.name_scope('create_attention_weights'): if options.attention_type == graph_creator_pb2.ConvGraphCreator.CO_ATTENTION: # Use co-attention to determine edge importance. # slogan_to_proposal_scores shape = [batch, max_proposal_num, max_slogan_num]. # proposal_scores shape = [batch, max_proposal_num] # slogan_scores shape = [batch, max_slogan_num] slogan_to_proposal_scores = self._create_edge_weights_helper( proposal_repr, slogan_repr, scope='slogan_to_proposal_scores') proposal_scores = utils.masked_avg(slogan_to_proposal_scores, mask=tf.expand_dims( slogan_mask, 1), dim=2) proposal_scores = tf.squeeze(proposal_scores, axis=-1) slogan_scores = utils.masked_avg(slogan_to_proposal_scores, mask=tf.expand_dims( proposal_mask, 2), dim=1) slogan_scores = tf.squeeze(slogan_scores, axis=1) elif options.attention_type == graph_creator_pb2.ConvGraphCreator.SELF_ATTENTION: # Use self-attention to determine edge importance. # similarity_proposal_proposal shape = [batch, max_proposal_num, max_proposal_num] # similarity_slogan_slogan shape = [batch, max_slogan_num, max_slogan_num] # proposal_scores shape = [batch, 1, max_proposal_num] # slogan_scores shape = [batch, 1, max_slogan_num] similarity_proposal_proposal = self._create_edge_weights_helper( proposal_repr, proposal_repr, scope='similarity_proposal_proposal') similarity_slogan_slogan = self._create_edge_weights_helper( slogan_repr, slogan_repr, scope='similarity_slogan_slogan') proposal_scores = utils.masked_avg( similarity_proposal_proposal, tf.expand_dims(proposal_mask, 2), dim=1) proposal_scores = tf.squeeze(proposal_scores, axis=1) slogan_scores = utils.masked_avg(similarity_slogan_slogan, tf.expand_dims( slogan_mask, 2), dim=1) slogan_scores = tf.squeeze(slogan_scores, axis=1) else: raise ValueError('Invalid attention type %s' % options.attention_type) return proposal_scores, slogan_scores
def _calc_oicr_loss(self, labels, num_proposals, proposals, scores_0, scores_1, scope, iou_threshold=0.5): """Calculates the OICR loss at refinement stage `i`. Args: labels: A [batch, num_classes] float tensor. num_proposals: A [batch] int tensor. proposals: A [batch, max_num_proposals, 4] float tensor. scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `k-th` refinement. scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `(k+1)-th` refinement. Returns: oicr_cross_entropy_loss: a scalar float tensor. """ with tf.name_scope(scope): (batch, max_num_proposals, num_classes_plus_one) = utils.get_tensor_shape(scores_0) num_classes = num_classes_plus_one - 1 # For each class, look for the most confident proposal. # proposal_ind shape = [batch, num_classes]. proposal_mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proposal_ind = utils.masked_argmax(tf.nn.softmax(scores_0, axis=-1)[:, :, 1:], tf.expand_dims(proposal_mask, axis=-1), dim=1) # Deal with the most confident proposal per each class. # Unstack the `proposal_ind`, `labels`. # proposal_labels shape = [batch, max_num_proposals, num_classes]. proposal_labels = [] indices_0 = tf.range(batch, dtype=tf.int64) for indices_1, label_per_class in zip( tf.unstack(proposal_ind, axis=-1), tf.unstack(labels, axis=-1)): # Gather the most confident proposal for the class. # confident_proosal shape = [batch, 4]. indices = tf.stack([indices_0, indices_1], axis=-1) confident_proposal = tf.gather_nd(proposals, indices) # Get the Iou from all the proposals to the most confident proposal. # iou shape = [batch, max_num_proposals]. confident_proposal_tiled = tf.tile( tf.expand_dims(confident_proposal, axis=1), [1, max_num_proposals, 1]) iou = box_utils.iou( tf.reshape(proposals, [-1, 4]), tf.reshape(confident_proposal_tiled, [-1, 4])) iou = tf.reshape(iou, [batch, max_num_proposals]) # Filter out irrelevant predictions using image-level label. target = tf.to_float(tf.greater_equal(iou, iou_threshold)) target = tf.where(label_per_class > 0, x=target, y=tf.zeros_like(target)) proposal_labels.append(target) proposal_labels = tf.stack(proposal_labels, axis=-1) # Add background targets, and normalize the sum value to 1.0. # proposal_labels shape = [batch, max_num_proposals, 1 + num_classes]. bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0) proposal_labels = tf.concat( [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels], axis=-1) proposal_labels = tf.div( proposal_labels, tf.reduce_sum(proposal_labels, axis=-1, keepdims=True)) assert_op = tf.Assert( tf.reduce_all( tf.abs(tf.reduce_sum(proposal_labels, axis=-1) - 1) < 1e-6), ["Probabilities not sum to ONE", proposal_labels]) # Compute the loss. with tf.control_dependencies([assert_op]): losses = tf.nn.softmax_cross_entropy_with_logits( labels=tf.stop_gradient(proposal_labels), logits=scores_1) oicr_cross_entropy_loss = tf.reduce_mean( utils.masked_avg(data=losses, mask=proposal_mask, dim=1)) return oicr_cross_entropy_loss
def build_loss(self, predictions, examples, **kwargs): """Build tf graph to compute loss. Args: predictions: dict of prediction results keyed by name. examples: dict of inputs keyed by name. Returns: loss_dict: dict of loss tensors keyed by name. """ options = self._model_proto loss_dict = {} with tf.name_scope('losses'): # Extract image-level labels. if not options.caption_as_label: labels = self._extract_class_label( class_texts=examples[InputDataFields.object_texts], vocabulary_list=self._vocabulary_list) else: labels = self._extract_class_label( class_texts=slim.flatten( examples[InputDataFields.caption_strings]), vocabulary_list=self._vocabulary_list) # A prediction model from caption to class # Loss of the multi-instance detection network. midn_class_logits = predictions[NOD2Predictions.midn_class_logits] losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=midn_class_logits) # Hard-negative mining. if options.midn_loss_negative_mining == nod2_model_pb2.NOD2Model.NONE: if options.classification_loss_use_sum: assert False loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(tf.reduce_sum(losses, axis=-1)), options.midn_loss_weight) else: if options.caption_as_label: loss_masks = tf.to_float( tf.reduce_any(labels > 0, axis=-1)) loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.squeeze( utils.masked_avg(tf.reduce_mean(losses, axis=-1), mask=loss_masks, dim=0)), options.midn_loss_weight) else: loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(losses), options.midn_loss_weight) elif options.midn_loss_negative_mining == nod2_model_pb2.NOD2Model.HARDEST: assert False loss_masks = self._midn_loss_mine_hardest_negative( labels, losses) loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean( utils.masked_avg(data=losses, mask=loss_masks, dim=1)) else: raise ValueError('Invalid negative mining method.') # Losses of the online instance classifier refinement network. (num_proposals, proposals) = (predictions[DetectionResultFields.num_proposals], predictions[DetectionResultFields.proposal_boxes]) batch, max_num_proposals, _ = utils.get_tensor_shape(proposals) proposal_scores_0 = predictions[ NOD2Predictions.oicr_proposal_scores + '_at_0'] if options.oicr_use_proba_r_given_c: proposal_scores_0 = predictions[ NOD2Predictions.midn_proba_r_given_c] proposal_scores_0 = tf.concat([ tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0 ], axis=-1) global_step = tf.train.get_or_create_global_step() oicr_loss_mask = tf.cast(global_step > options.oicr_start_step, tf.float32) for i in range(options.oicr_iterations): proposal_scores_1 = predictions[ NOD2Predictions.oicr_proposal_scores + '_at_{}'.format(i + 1)] oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss( labels, num_proposals, proposals, tf.stop_gradient(proposal_scores_0), proposal_scores_1, scope='oicr_{}'.format(i + 1), iou_threshold=options.oicr_iou_threshold) loss_dict['oicr_cross_entropy_loss_at_{}'.format( i + 1)] = tf.multiply( oicr_loss_mask * oicr_cross_entropy_loss_at_i, options.oicr_loss_weight) proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1) # Min-entropy loss. mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proba_r_given_c = predictions[NOD2Predictions.midn_proba_r_given_c] losses = tf.log(proba_r_given_c + _EPSILON) losses = tf.squeeze(utils.masked_sum_nd(data=losses, mask=mask, dim=1), axis=1) min_entropy_loss = tf.reduce_mean( tf.reduce_sum(losses * labels, axis=1)) min_entropy_loss = tf.multiply(min_entropy_loss, options.min_entropy_loss_weight) max_proba = tf.reduce_mean( utils.masked_maximum(data=proba_r_given_c, mask=tf.expand_dims(mask, -1), dim=1)) tf.losses.add_loss(min_entropy_loss) tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss) tf.summary.scalar('loss/max_proba', max_proba) return loss_dict
def build_loss(self, predictions, examples, **kwargs): """Build tf graph to compute loss. Args: predictions: dict of prediction results keyed by name. examples: dict of inputs keyed by name. Returns: loss_dict: dict of loss tensors keyed by name. """ options = self._model_proto loss_dict = {} with tf.name_scope('losses'): # Extract image-level labels. labels = self._extract_class_label( class_texts=slim.flatten(predictions[ NOD3Predictions.training_only_caption_strings]), vocabulary_list=self._vocabulary_list) # A prediction model from caption to class # Loss of the multi-instance detection network. midn_class_logits = predictions[NOD3Predictions.midn_class_logits] losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=midn_class_logits) # Hard-negative mining. if options.midn_loss_negative_mining == nod3_model_pb2.NOD3Model.NONE: if options.classification_loss_use_sum: assert False loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(tf.reduce_sum(losses, axis=-1)), options.midn_loss_weight) else: if options.caption_as_label: loss_masks = tf.to_float( tf.reduce_any(labels > 0, axis=-1)) loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.squeeze( utils.masked_avg(tf.reduce_mean(losses, axis=-1), mask=loss_masks, dim=0)), options.midn_loss_weight) else: loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(losses), options.midn_loss_weight) elif options.midn_loss_negative_mining == nod3_model_pb2.NOD3Model.HARDEST: assert False loss_masks = self._midn_loss_mine_hardest_negative( labels, losses) loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean( utils.masked_avg(data=losses, mask=loss_masks, dim=1)) else: raise ValueError('Invalid negative mining method.') # Triplet loss if options.triplet_loss_weight > 0: (image_id, image_ids_gathered, similarity) = (predictions[NOD3Predictions.image_id], predictions[NOD3Predictions.image_id], predictions[NOD3Predictions.similarity]) distance = 1.0 - similarity pos_mask = tf.cast( tf.equal(tf.expand_dims(image_id, axis=1), tf.expand_dims(image_ids_gathered, axis=0)), tf.float32) neg_mask = 1.0 - pos_mask distance_ap = utils.masked_maximum(distance, pos_mask) if options.triplet_loss_use_semihard: # Use the semihard. # negatives_outside: smallest D_an where D_an > D_ap. mask = tf.cast(tf.greater(distance, distance_ap), tf.float32) mask = mask * neg_mask negatives_outside = utils.masked_minimum(distance, mask) # negatives_inside: largest D_an. negatives_inside = utils.masked_maximum(distance, neg_mask) # distance_an: the semihard negatives. mask_condition = tf.greater( tf.reduce_sum(mask, axis=1, keepdims=True), 0.0) distance_an = tf.where(mask_condition, negatives_outside, negatives_inside) else: # Use the hardest. distance_an = utils.masked_minimum(distance, neg_mask) losses = tf.maximum( distance_ap - distance_an + options.triplet_loss_margin, 0) num_loss_examples = tf.count_nonzero(losses, dtype=tf.float32) triplet_loss = tf.reduce_mean(losses) loss_dict['triplet_loss'] = tf.multiply( triplet_loss, options.triplet_loss_weight) # Losses of the online instance classifier refinement network. (num_proposals, proposals) = (predictions[DetectionResultFields.num_proposals], predictions[DetectionResultFields.proposal_boxes]) batch, max_num_proposals, _ = utils.get_tensor_shape(proposals) proposal_scores_0 = predictions[ NOD3Predictions.oicr_proposal_scores + '_at_0'] if options.oicr_use_proba_r_given_c: proposal_scores_0 = predictions[ NOD3Predictions.midn_proba_r_given_c] proposal_scores_0 = tf.concat([ tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0 ], axis=-1) global_step = tf.train.get_or_create_global_step() oicr_loss_mask = tf.cast(global_step > options.oicr_start_step, tf.float32) for i in range(options.oicr_iterations): proposal_scores_1 = predictions[ NOD3Predictions.oicr_proposal_scores + '_at_{}'.format(i + 1)] oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss( labels, num_proposals, proposals, tf.stop_gradient(proposal_scores_0), proposal_scores_1, scope='oicr_{}'.format(i + 1), iou_threshold=options.oicr_iou_threshold) loss_dict['oicr_cross_entropy_loss_at_{}'.format( i + 1)] = tf.multiply( oicr_loss_mask * oicr_cross_entropy_loss_at_i, options.oicr_loss_weight) proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1) # Min-entropy loss. mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proba_r_given_c = predictions[NOD3Predictions.midn_proba_r_given_c] losses = tf.log(proba_r_given_c + _EPSILON) losses = tf.squeeze(utils.masked_sum_nd(data=losses, mask=mask, dim=1), axis=1) min_entropy_loss = tf.reduce_mean( tf.reduce_sum(losses * labels, axis=1)) min_entropy_loss = tf.multiply(min_entropy_loss, options.min_entropy_loss_weight) max_proba = tf.reduce_mean( utils.masked_maximum(data=proba_r_given_c, mask=tf.expand_dims(mask, -1), dim=1)) tf.losses.add_loss(min_entropy_loss) if options.triplet_loss_weight > 0: tf.summary.scalar('loss/num_loss_examples', num_loss_examples) tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss) tf.summary.scalar('loss/max_proba', max_proba) return loss_dict
def build_loss(self, predictions, examples, **kwargs): """Build tf graph to compute loss. Args: predictions: dict of prediction results keyed by name. examples: dict of inputs keyed by name. Returns: loss_dict: dict of loss tensors keyed by name. """ options = self._model_proto loss_dict = {} with tf.name_scope('losses'): # Extract image-level labels. assert options.caption_as_label vocabulary_list = self._vocabulary_list mapping = { 'traffic light': 'stoplight', 'fire hydrant': 'hydrant', 'stop sign': 'sign', 'parking meter': 'meter', 'sports ball': 'ball', 'baseball bat': 'bat', 'baseball glove': 'glove', 'tennis racket': 'racket', 'wine glass': 'wineglass', 'hot dog': 'hotdog', 'potted plant': 'plant', 'dining table': 'table', 'cell phone': 'cellphone', 'teddy bear': 'teddy', 'hair drier': 'hairdryer', } vocabulary_list = [ mapping.get(cls, cls) for cls in vocabulary_list ] labels_gt = self._extract_class_label( class_texts=slim.flatten( examples[InputDataFields.caption_strings]), vocabulary_list=vocabulary_list) examples[NOD4Predictions.debug_groundtruth_labels] = labels_gt if options.label_strategem == nod4_model_pb2.NOD4Model.EXACTLY_MATCH: labels = labels_gt elif options.label_strategem == nod4_model_pb2.NOD4Model.W2V_SYNONYM_MATCH: labels_ps = self._extract_pseudo_label( texts=slim.flatten( examples[InputDataFields.caption_strings]), vocabulary_list=vocabulary_list, open_vocabulary_list=self._open_vocabulary_list, embedding_dims=options.embedding_dims) select_op = tf.reduce_any(labels_gt > 0, axis=-1) labels = tf.where(select_op, labels_gt, labels_ps) labels_ps = tf.where(select_op, tf.zeros_like(labels_ps), labels_ps) examples[NOD4Predictions.debug_pseudo_labels] = labels_ps else: raise ValueError('Invalid label strategy') # Loss of the multi-instance detection network. midn_class_logits = predictions[NOD4Predictions.midn_class_logits] losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=midn_class_logits) # Hard-negative mining. if options.midn_loss_negative_mining == nod4_model_pb2.NOD4Model.NONE: if options.classification_loss_use_sum: assert False loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(tf.reduce_sum(losses, axis=-1)), options.midn_loss_weight) else: if options.caption_as_label: loss_masks = tf.to_float( tf.reduce_any(labels > 0, axis=-1)) loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.squeeze( utils.masked_avg(tf.reduce_mean(losses, axis=-1), mask=loss_masks, dim=0)), options.midn_loss_weight) else: loss_dict['midn_cross_entropy_loss'] = tf.multiply( tf.reduce_mean(losses), options.midn_loss_weight) elif options.midn_loss_negative_mining == nod4_model_pb2.NOD4Model.HARDEST: assert False loss_masks = self._midn_loss_mine_hardest_negative( labels, losses) loss_dict['midn_cross_entropy_loss'] = tf.reduce_mean( utils.masked_avg(data=losses, mask=loss_masks, dim=1)) else: raise ValueError('Invalid negative mining method.') # Losses of the online instance classifier refinement network. (num_proposals, proposals) = (predictions[DetectionResultFields.num_proposals], predictions[DetectionResultFields.proposal_boxes]) batch, max_num_proposals, _ = utils.get_tensor_shape(proposals) proposal_scores_0 = predictions[ NOD4Predictions.oicr_proposal_scores + '_at_0'] if options.oicr_use_proba_r_given_c: proposal_scores_0 = predictions[ NOD4Predictions.midn_proba_r_given_c] proposal_scores_0 = tf.concat([ tf.fill([batch, max_num_proposals, 1], 0.0), proposal_scores_0 ], axis=-1) global_step = tf.train.get_or_create_global_step() oicr_loss_mask = tf.cast(global_step > options.oicr_start_step, tf.float32) for i in range(options.oicr_iterations): proposal_scores_1 = predictions[ NOD4Predictions.oicr_proposal_scores + '_at_{}'.format(i + 1)] oicr_cross_entropy_loss_at_i = model_utils.calc_oicr_loss( labels, num_proposals, proposals, tf.stop_gradient(proposal_scores_0), proposal_scores_1, scope='oicr_{}'.format(i + 1), iou_threshold=options.oicr_iou_threshold) loss_dict['oicr_cross_entropy_loss_at_{}'.format( i + 1)] = tf.multiply( oicr_loss_mask * oicr_cross_entropy_loss_at_i, options.oicr_loss_weight) proposal_scores_0 = tf.nn.softmax(proposal_scores_1, axis=-1) # Min-entropy loss. mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proba_r_given_c = predictions[NOD4Predictions.midn_proba_r_given_c] losses = tf.log(proba_r_given_c + _EPSILON) losses = tf.squeeze(utils.masked_sum_nd(data=losses, mask=mask, dim=1), axis=1) min_entropy_loss = tf.reduce_mean( tf.reduce_sum(losses * labels, axis=1)) min_entropy_loss = tf.multiply(min_entropy_loss, options.min_entropy_loss_weight) max_proba = tf.reduce_mean( utils.masked_maximum(data=proba_r_given_c, mask=tf.expand_dims(mask, -1), dim=1)) tf.losses.add_loss(min_entropy_loss) tf.summary.scalar('loss/min_entropy_loss', min_entropy_loss) tf.summary.scalar('loss/max_proba', max_proba) return loss_dict