def boolean_mask(boxlist, indicator, fields=None, scope=None, use_static_shapes=False, indicator_sum=None): """Select boxes from BoxList according to indicator and return new BoxList. `boolean_mask` returns the subset of boxes that are marked as "True" by the indicator tensor. By default, `boolean_mask` returns boxes corresponding to the input index list, as well as all additional fields stored in the boxlist (indexing into the first dimension). However one can optionally only draw from a subset of fields. Args: boxlist: BoxList holding N boxes indicator: a rank-1 boolean tensor fields: (optional) list of fields to also gather from. If None (default), all fields are gathered from. Pass an empty fields list to only gather the box coordinates. scope: name scope. use_static_shapes: Whether to use an implementation with static shape gurantees. indicator_sum: An integer containing the sum of `indicator` vector. Only required if `use_static_shape` is True. Returns: subboxlist: a BoxList corresponding to the subset of the input BoxList specified by indicator Raises: ValueError: if `indicator` is not a rank-1 boolean tensor. """ with tf.name_scope(scope, 'BooleanMask'): if indicator.shape.ndims != 1: raise ValueError('indicator should have rank 1') if indicator.dtype != tf.bool: raise ValueError('indicator should be a boolean tensor') if use_static_shapes: if not (indicator_sum and isinstance(indicator_sum, int)): raise ValueError('`indicator_sum` must be a of type int') selected_positions = tf.to_float(indicator) indexed_positions = tf.cast( tf.multiply( tf.cumsum(selected_positions), selected_positions), dtype=tf.int32) one_hot_selector = tf.one_hot( indexed_positions - 1, indicator_sum, dtype=tf.float32) sampled_indices = tf.cast( tf.tensordot( tf.to_float(tf.range(tf.shape(indicator)[0])), one_hot_selector, axes=[0, 0]), dtype=tf.int32) return gather(boxlist, sampled_indices, use_static_shapes=True) else: subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator)) if fields is None: fields = boxlist.get_extra_fields() for field in fields: if not boxlist.has_field(field): raise ValueError('boxlist must contain all specified fields') subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator) subboxlist.add_field(field, subfieldlist) return subboxlist
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6): """Filters YOLO boxes by thresholding on object and class confidence. Arguments: box_confidence -- tensor of shape (19, 19, 5, 1) boxes -- tensor of shape (19, 19, 5, 4) box_class_probs -- tensor of shape (19, 19, 5, 80) threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box Returns: scores -- tensor of shape (None,), containing the class probability score for selected boxes boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. For example, the actual output size of scores would be (10,) if there are 10 boxes. """ # Step 1: Compute box scores box_scores = box_confidence * box_class_probs # [19, 19, 5, 1] * [19, 19, 5, 80] = [19, 19, 5, 80] # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis = -1, keepdims = False) # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) filtering_mask = box_class_scores >= threshold # Step 4: Apply the mask to scores, boxes and classes scores = tf.boolean_mask(box_class_scores, filtering_mask) boxes = tf.boolean_mask(boxes, filtering_mask) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes
def remap_keys(sparse_tensor): # Current indices of our SparseTensor that we need to fix bad_indices = sparse_tensor.indices # shape = (current_batch_size * (number_of_items/users[i] + 1), 2) # Current values of our SparseTensor that we need to fix bad_values = sparse_tensor.values # shape = (current_batch_size * (number_of_items/users[i] + 1),) # Since batch is ordered, the last value for a batch index is the user # Find where the batch index chages to extract the user rows # 1 where user, else 0 user_mask = tf.concat(values = [bad_indices[1:,0] - bad_indices[:-1,0], tf.constant(value = [1], dtype = tf.int64)], axis = 0) # shape = (current_batch_size * (number_of_items/users[i] + 1), 2) # Mask out the user rows from the values good_values = tf.boolean_mask(tensor = bad_values, mask = tf.equal(x = user_mask, y = 0)) # shape = (current_batch_size * number_of_items/users[i],) item_indices = tf.boolean_mask(tensor = bad_indices, mask = tf.equal(x = user_mask, y = 0)) # shape = (current_batch_size * number_of_items/users[i],) user_indices = tf.boolean_mask(tensor = bad_indices, mask = tf.equal(x = user_mask, y = 1))[:, 1] # shape = (current_batch_size,) good_user_indices = tf.gather(params = user_indices, indices = item_indices[:,0]) # shape = (current_batch_size * number_of_items/users[i],) # User and item indices are rank 1, need to make rank 1 to concat good_user_indices_expanded = tf.expand_dims(input = good_user_indices, axis = -1) # shape = (current_batch_size * number_of_items/users[i], 1) good_item_indices_expanded = tf.expand_dims(input = item_indices[:, 1], axis = -1) # shape = (current_batch_size * number_of_items/users[i], 1) good_indices = tf.concat(values = [good_user_indices_expanded, good_item_indices_expanded], axis = 1) # shape = (current_batch_size * number_of_items/users[i], 2) remapped_sparse_tensor = tf.SparseTensor(indices = good_indices, values = good_values, dense_shape = sparse_tensor.dense_shape) return remapped_sparse_tensor
def roc_auc_score(y_pred, y_true): """ ROC AUC Score. Approximates the Area Under Curve score, using approximation based on the Wilcoxon-Mann-Whitney U statistic. Yan, L., Dodier, R., Mozer, M. C., & Wolniewicz, R. (2003). Optimizing Classifier Performance via an Approximation to the Wilcoxon-Mann-Whitney Statistic. Measures overall performance for a full range of threshold levels. Arguments: y_pred: `Tensor`. Predicted values. y_true: `Tensor` . Targets (labels), a probability distribution. """ with tf.name_scope("RocAucScore"): pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool)) neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool)) pos = tf.expand_dims(pos, 0) neg = tf.expand_dims(neg, 1) # original paper suggests performance is robust to exact parameter choice gamma = 0.2 p = 3 difference = tf.zeros_like(pos * neg) + pos - neg - gamma masked = tf.boolean_mask(difference, difference < 0.0) return tf.reduce_sum(tf.pow(-masked, p))
def loss(self, logits, labels, regularization): """Adds to the inference model the layers required to generate loss.""" with tf.name_scope('loss'): with tf.name_scope('var_loss'): labels = tf.cast(labels, tf.float32) shape = labels.get_shape() same_class = tf.boolean_mask(logits, tf.equal(labels, tf.ones(shape))) diff_class = tf.boolean_mask(logits, tf.not_equal(labels, tf.ones(shape))) same_mean, same_var = tf.nn.moments(same_class, [0]) diff_mean, diff_var = tf.nn.moments(diff_class, [0]) var_loss = same_var + diff_var with tf.name_scope('mean_loss'): mean_loss = self.lamda * tf.where(tf.greater(self.mu - (same_mean - diff_mean), 0), self.mu - (same_mean - diff_mean), 0) with tf.name_scope('regularization'): regularization *= tf.add_n(self.regularizers) loss = var_loss + mean_loss + regularization # Summaries for TensorBoard. tf.summary.scalar('loss/total', loss) with tf.name_scope('averages'): averages = tf.train.ExponentialMovingAverage(0.9) op_averages = averages.apply([var_loss, mean_loss, regularization, loss]) tf.summary.scalar('loss/avg/var_loss', averages.average(var_loss)) tf.summary.scalar('loss/avg/mean_loss', averages.average(mean_loss)) tf.summary.scalar('loss/avg/regularization', averages.average(regularization)) tf.summary.scalar('loss/avg/total', averages.average(loss)) with tf.control_dependencies([op_averages]): loss_average = tf.identity(averages.average(loss), name='control') return loss, loss_average
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6): """Filters YOLO boxes by thresholding on object and class confidence. Arguments: box_confidence -- tensor of shape (19, 19, 5, 1) boxes -- tensor of shape (19, 19, 5, 4) box_class_probs -- tensor of shape (19, 19, 5, 80) threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box Returns: scores -- tensor of shape (None,), containing the class probability score for selected boxes boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes """ # Step 1: Compute box scores box_scores = box_confidence*box_class_probs # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis=-1) # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) filtering_mask = box_class_scores >= threshold # Step 4: Apply the mask to scores, boxes and classes scores = tf.boolean_mask(box_class_scores,filtering_mask) boxes = tf.boolean_mask(boxes,filtering_mask) classes = tf.boolean_mask(box_classes,filtering_mask) return scores, boxes, classes
def build_detector(self): img_size = self.config['image_size'] self.image_ph = tf.placeholder(shape=[None, None, 3], dtype=tf.float32, name='img_ph') self.seg_ph = tf.placeholder(shape=[None, None], dtype=tf.int32, name='seg_ph') img = tf.image.resize_bilinear(tf.expand_dims(self.image_ph, 0), (img_size, img_size)) self.net.create_trunk(img) if args.detect: self.net.create_multibox_head(self.loader.num_classes) confidence = tf.nn.softmax(tf.squeeze(self.net.outputs['confidence'])) location = tf.squeeze(self.net.outputs['location']) self.nms(location, confidence, self.bboxer.tiling) if args.segment: self.net.create_segmentation_head(self.loader.num_classes) self.segmentation = self.net.outputs['segmentation'] seg_shape = tf.shape(self.image_ph)[:2] self.segmentation = tf.image.resize_bilinear(self.segmentation, seg_shape) self.segmentation = tf.cast(tf.argmax(tf.squeeze(self.segmentation), axis=-1), tf.int32) self.segmentation = tf.reshape(self.segmentation, seg_shape) self.segmentation.set_shape([None, None]) if not self.no_gt: easy_mask = self.seg_ph <= self.loader.num_classes predictions = tf.boolean_mask(self.segmentation, easy_mask) labels = tf.boolean_mask(self.seg_ph, easy_mask) self.mean_iou, self.iou_update = mean_iou(predictions, labels, self.loader.num_classes) else: self.mean_iou = tf.constant(0) self.iou_update = tf.constant(0)
def nms(self, localization, confidence, tiling): good_bboxes = decode_bboxes(localization, tiling) not_crap_mask = tf.reduce_max(confidence[:, 1:], axis=-1) >= args.conf_thresh good_bboxes = tf.boolean_mask(good_bboxes, not_crap_mask) confidence = tf.boolean_mask(confidence, not_crap_mask) self.detection_list = [] self.score_list = [] for i in range(1, self.loader.num_classes): class_mask = tf.greater(confidence[:, i], args.conf_thresh) class_scores = tf.boolean_mask(confidence[:, i], class_mask) class_bboxes = tf.boolean_mask(good_bboxes, class_mask) K = tf.minimum(tf.size(class_scores), args.top_k_nms) _, top_k_inds = tf.nn.top_k(class_scores, K) top_class_scores = tf.gather(class_scores, top_k_inds) top_class_bboxes = tf.gather(class_bboxes, top_k_inds) final_inds = tf.image.non_max_suppression(top_class_bboxes, top_class_scores, max_output_size=args.top_k_after_nms, iou_threshold=args.nms_thresh) final_class_bboxes = tf.gather(top_class_bboxes, final_inds) final_scores = tf.gather(top_class_scores, final_inds) self.detection_list.append(final_class_bboxes) self.score_list.append(final_scores)
def __init__(self, prev_actions_logp, actions_logp, action_kl, actions_entropy, values, valid_mask, advantages, value_targets, vf_loss_coeff=0.5, entropy_coeff=-0.01, clip_param=0.3): logp_ratio = tf.exp(actions_logp - prev_actions_logp) surrogate_loss = tf.minimum( advantages * logp_ratio, advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, 1 + clip_param)) self.mean_kl = tf.reduce_mean(action_kl) self.pi_loss = -tf.reduce_sum(surrogate_loss) # The baseline loss delta = tf.boolean_mask(values - value_targets, valid_mask) self.value_targets = value_targets self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta)) # The entropy loss self.entropy = tf.reduce_sum( tf.boolean_mask(actions_entropy, valid_mask)) # The summed weighted loss self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff + self.entropy * entropy_coeff)
def add_loss_op(self, preds): """Adds Ops for the loss function to the computational graph. TODO: Compute averaged cross entropy loss for the predictions. Importantly, you must ignore the loss for any masked tokens. Hint: You might find tf.boolean_mask useful to mask the losses on masked tokens. Hint: You can use tf.nn.sparse_softmax_cross_entropy_with_logits to simplify your implementation. You might find tf.reduce_mean useful. Args: pred: A tensor of shape (batch_size, max_length, n_classes) containing the output of the neural network before the softmax layer. Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE (~2-4 lines) logits=tf.boolean_mask(preds,self.mask_placeholder) labels=tf.boolean_mask(self.labels_placeholder,self.mask_placeholder) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels) loss = tf.reduce_mean(loss) ### END YOUR CODE return loss
def lamb_func(logit, logic, lamb): logit_pos = tf.boolean_mask(logit, logic) logit_neg = tf.boolean_mask(logit, tf.logical_not(logic)) logit_neg_exp = tf.exp(logit_neg * lamb) z = tf.reduce_mean(logit_neg_exp) left = tf.truediv(tf.reduce_mean(logit_neg * logit_neg_exp), z) right = tf.reduce_mean(logit_pos) return left, right
def shortlist_insert(): larger_ids = tf.boolean_mask(tf.to_int64(ids), larger_scores) larger_score_values = tf.boolean_mask(scores, larger_scores) shortlist_ids, new_ids, new_scores = self.ops.top_n_insert( self.sl_ids, self.sl_scores, larger_ids, larger_score_values) u1 = tf.scatter_update(self.sl_ids, shortlist_ids, new_ids) u2 = tf.scatter_update(self.sl_scores, shortlist_ids, new_scores) return tf.group(u1, u2)
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded label_logits: fHxfWxNA box_logits: fHxfWxNAx4 Returns: label_loss, box_loss """ with tf.device('/cpu:0'): valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.count_nonzero(pos_mask, dtype=tf.int32, name='num_pos_anchor') valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero( tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) summaries.append(tf.truediv( pos_prediction_corr, nr_pos, name='recall_th{}'.format(th))) precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction)) precision = tf.where(tf.equal(nr_pos_prediction, 0), 0.0, precision, name='precision_th{}'.format(th)) summaries.append(precision) add_moving_summary(*summaries) label_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 box_loss = tf.losses.huber_loss( pos_anchor_boxes, pos_box_logits, delta=delta, reduction=tf.losses.Reduction.SUM) / delta box_loss = tf.div( box_loss, tf.cast(nr_valid, tf.float32), name='box_loss') add_moving_summary(label_loss, box_loss, nr_valid, nr_pos) return label_loss, box_loss
def remap_keys(sparse_tensor): # Current indices of our SparseTensor that we need to fix bad_indices = sparse_tensor.indices # Current values of our SparseTensor that we need to fix bad_values = sparse_tensor.values # Group by the batch_indices and get the count for each size = tf.segment_sum(data = tf.ones_like(bad_indices[:,0], dtype = tf.int64), segment_ids = bad_indices[:,0]) - 1 # The number of batch_indices (this should be batch_size unless it is a partially full batch) length = tf.shape(size, out_type = tf.int64)[0] # Finds the cumulative sum which we can use for indexing later cum = tf.cumsum(size) # The offsets between each example in the batch due to our concatentation of the keys in the decode_example method length_range = tf.range(start = 0, limit = length, delta = 1, dtype = tf.int64) # Indices of the SparseTensor's indices member of the rows we added by the concatentation of our keys in the decode_example method cum_range = cum + length_range # The keys that we have extracted back out of our concatentated SparseTensor gathered_indices = tf.squeeze(tf.gather(bad_indices, cum_range)[:,1]) # The enumerated row indices of the SparseTensor's indices member sparse_indices_range = tf.range(tf.shape(bad_indices, out_type = tf.int64)[0], dtype = tf.int64) # We want to find here the row indices of the SparseTensor's indices member that are of our actual data and not the concatentated rows # So we want to find the intersection of the two sets and then take the opposite of that x = sparse_indices_range s = cum_range # Number of multiples we are going to tile x, which is our sparse_indices_range tile_multiples = tf.concat([tf.ones(tf.shape(tf.shape(x)), dtype=tf.int64), tf.shape(s, out_type = tf.int64)], axis = 0) # Expands x, our sparse_indices_range, into a rank 2 tensor and then multiplies the rows by 1 (no copying) and the columns by the number of examples in the batch x_tile = tf.tile(tf.expand_dims(x, -1), tile_multiples) # Essentially a vectorized logical or, that we then negate x_not_in_s = ~tf.reduce_any(tf.equal(x_tile, s), -1) # The SparseTensor's indices that are our actual data by using the boolean_mask we just made above applied to the entire indices member of our SparseTensor selected_indices = tf.boolean_mask(tensor = bad_indices, mask = x_not_in_s, axis = 0) # Apply the same boolean_mask to the entire values member of our SparseTensor to get the actual values data selected_values = tf.boolean_mask(tensor = bad_values, mask = x_not_in_s, axis = 0) # Need to replace the first column of our selected_indices with keys, so we first need to tile our gathered_indices tiling = tf.tile(input = tf.expand_dims(gathered_indices[0], -1), multiples = tf.expand_dims(size[0] , -1)) # We have to repeatedly apply the tiling to each example in the batch # Since it is jagged we cannot use tf.map_fn due to the stacking of the TensorArray, so we have to create our own custom version def loop_body(i, tensor_grow): return i + 1, tf.concat(values = [tensor_grow, tf.tile(input = tf.expand_dims(gathered_indices[i], -1), multiples = tf.expand_dims(size[i] , -1))], axis = 0) _, result = tf.while_loop(lambda i, tensor_grow: i < length, loop_body, [tf.constant(1, dtype = tf.int64), tiling]) # Concatenate tiled keys with the 2nd column of selected_indices selected_indices_fixed = tf.concat([tf.expand_dims(result, -1), tf.expand_dims(selected_indices[:, 1], -1)], axis = 1) # Combine everything together back into a SparseTensor remapped_sparse_tensor = tf.SparseTensor(indices = selected_indices_fixed, values = selected_values, dense_shape = sparse_tensor.dense_shape) return remapped_sparse_tensor
def _build_detector(self): # 解析网络的预测结果, 这里采用了判断预测框类别, 再 NMS 的预测策略 """Interpret the net output and get the predicted boxes""" # the width and height of orignal image self.width = tf.placeholder(tf.float32, name="img_w") self.height = tf.placeholder(tf.float32, name="img_h") # get class prob, confidence, boxes from net output idx1 = self.S * self.S * self.C idx2 = idx1 + self.S * self.S * self.B # class prediction; 具体的位置都是自己设置的, 因为输出是一维的, 所以直接切出来合适的大小, 通过反向传播来学习 class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C]) # confidence confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B]) # boxes -> (x, y, w, h) boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4]) # 为什么是二维的呢, 输出不应该是一维的吗 # convert the x, y to the coordinates relative to the top left point of the image # the predictions of w, h are the square root # multiply the width and height of image; # 这里是 decode 过程 (得到 box 的真实位置), 可以如下: # 就是把预测值加上 offset, 除以 self.S 将坐标转换为 [0, 1] 范围, 乘以 self.width 是转化为实际位置 boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width, (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height, tf.square(boxes[:, :, :, 2]) * self.width, tf.square(boxes[:, :, :, 3]) * self.height], axis=3) # class-specific confidence scores [S, S, B, C] scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2) # 7x7x2x1 * 7x7x1x20 = 7x7x2x20; 好神奇 scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C] boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]; 这里用这种方式实现了论文里的思路 # find each box class, only select the max score box_classes = tf.argmax(scores, axis=1) # 求出每个 score 20 个分类中最大值的索引 box_class_scores = tf.reduce_max(scores, axis=1) # 找到对应维中的最大值 # print(sess.run(tf.argmax([[1, 2], [3, 4]], axis=1))) # [1 1] # print(sess.run(tf.reduce_max([[1, 2], [3, 4]], axis=1))) # [2 4] # filter the boxes by the score threshold filter_mask = box_class_scores >= self.threshold scores = tf.boolean_mask(box_class_scores, filter_mask) boxes = tf.boolean_mask(boxes, filter_mask) box_classes = tf.boolean_mask(box_classes, filter_mask) # non max suppression (do not distinguish different classes) # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression # box (x, y, w, h) -> box (x1, y1, x2, y2) _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3], boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1) nms_indices = tf.image.non_max_suppression(_boxes, scores, self.max_output_size, self.iou_threshold) self.scores = tf.gather(scores, nms_indices) self.boxes = tf.gather(boxes, nms_indices) self.box_classes = tf.gather(box_classes, nms_indices)
def get_detailed_assigned_priors_summary(assigned_priors, priors_info, name): """ Get assigned priors 1D tensors by SSD heads and priors type. Args: assigned_priors: Assigned priors, tensor of shape (num_priors). priors_info: Information about priors, list of pairs for every ssd head: tensor_dimensions, num_priors_per_pixel. name: Output name. Returns: detailed_assigned_priors: Dictionary with tensors for every SSD head and prior type. """ assert len(assigned_priors.shape) == 1 detailed_assigned_priors = dict() detailed_assigned_priors['priors/{0}'.format(name)] = assigned_priors start = 0 total_priors_number = int(assigned_priors.shape[0]) for head_id, (tensor_dimensions, num_priors_per_pixel) in enumerate(priors_info): priors_per_type = np.prod(tensor_dimensions) priors_count = np.prod(tensor_dimensions) * num_priors_per_pixel prior_map = np.zeros(shape=total_priors_number, dtype=np.bool) for i in range(priors_count): prior_map[start + i] = True if isinstance(assigned_priors, tf.Tensor): assigned_priors_head = tf.boolean_mask(assigned_priors, prior_map) assigned_priors_head = tf.reshape(assigned_priors_head, [priors_count]) else: assigned_priors_head = assigned_priors[prior_map] detailed_assigned_priors['priors_by_head/{0}/head_{1}'.format(name, head_id)] = assigned_priors_head for offset in range(num_priors_per_pixel): prior_map = np.zeros(shape=total_priors_number, dtype=np.bool) for i in range(priors_per_type): prior_map[start + offset + i * num_priors_per_pixel] = True if isinstance(assigned_priors, tf.Tensor): assigned_priors_head_type = tf.boolean_mask(assigned_priors, prior_map) assigned_priors_head_type = tf.reshape(assigned_priors_head_type, [priors_per_type]) else: assigned_priors_head_type = assigned_priors[prior_map] assigned_priors_head_type_name = 'priors_by_head_and_type/{0}/head_{1}/prior_{2}'.format(name, head_id, offset) detailed_assigned_priors[assigned_priors_head_type_name] = assigned_priors_head_type start += priors_count return detailed_assigned_priors
def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal( unique_indices, i * tf.ones(num_boxes, dtype=tf.int32)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense( box_indices, [num_classes], 1, validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6): """Filter YOLO boxes based on object and class confidence.""" box_scores = box_confidence * box_class_probs box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis=-1) prediction_mask = box_class_scores >= threshold # TODO: Expose tf.boolean_mask to Keras backend? boxes = tf.boolean_mask(boxes, prediction_mask) scores = tf.boolean_mask(box_class_scores, prediction_mask) classes = tf.boolean_mask(box_classes, prediction_mask) return boxes, scores, classes
def spread_loss(labels, activations, margin): activations_shape = activations.get_shape().as_list() mask_t = tf.equal(labels, 1) mask_i = tf.equal(labels, 0) activations_t = tf.reshape( tf.boolean_mask(activations, mask_t), [activations_shape[0], 1] ) activations_i = tf.reshape( tf.boolean_mask(activations, mask_i), [activations_shape[0], activations_shape[1] - 1] ) gap_mit = tf.reduce_sum(tf.square(tf.nn.relu(margin - (activations_t - activations_i)))) return gap_mit
def spread_loss(labels, activations, iterations_per_epoch, global_step, name): """Spread loss :param labels: (24, 10] in one-hot vector :param activations: [24, 10], activation for each class :param margin: increment from 0.2 to 0.9 during training :return: spread loss """ # Margin schedule # Margin increase from 0.2 to 0.9 by an increment of 0.1 for every epoch margin = tf.train.piecewise_constant( tf.cast(global_step, dtype=tf.int32), boundaries=[ (iterations_per_epoch * x) for x in range(1, 8) ], values=[ x / 10.0 for x in range(2, 10) ] ) activations_shape = activations.get_shape().as_list() with tf.variable_scope(name) as scope: # mask_t, mask_f Tensor (?, 10) mask_t = tf.equal(labels, 1) # Mask for the true label mask_i = tf.equal(labels, 0) # Mask for the non-true label # Activation for the true label # activations_t (?, 1) activations_t = tf.reshape( tf.boolean_mask(activations, mask_t), shape=(tf.shape(activations)[0], 1) ) # Activation for the other classes # activations_i (?, 9) activations_i = tf.reshape( tf.boolean_mask(activations, mask_i), [tf.shape(activations)[0], activations_shape[1] - 1] ) l = tf.reduce_sum( tf.square( tf.maximum( 0.0, margin - (activations_t - activations_i) ) ) ) tf.losses.add_loss(l) return l
def flatten_binary_scores(scores, labels, ignore=None): """ Flattens predictions in the batch (binary case) Remove labels equal to 'ignore' """ scores = tf.reshape(scores, (-1,)) labels = tf.reshape(labels, (-1,)) if ignore is None: return scores, labels valid = tf.not_equal(labels, ignore) vscores = tf.boolean_mask(scores, valid, name='valid_scores') vlabels = tf.boolean_mask(labels, valid, name='valid_labels') return vscores, vlabels
def _build_detector(self): """Interpret the net output and get the predicted boxes""" # the width and height of orignal image self.width = tf.placeholder(tf.float32, name="img_w") self.height = tf.placeholder(tf.float32, name="img_h") # get class prob, confidence, boxes from net output idx1 = self.S * self.S * self.C# 总 类别预测数量 7*7*20 = 980 idx2 = idx1 + self.S * self.S * self.B# 总边框数量 + 总 类别预测数量 # class prediction 类别预测概率 7*7*2=98 class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C]) # confidence 置信度 0/1 * 交并比 confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B]) # boxes -> (x, y, w, h) 7*7*1*4 + 7*7*1*4 = 196 boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])# (x,y,w,h) # convert the x, y to the coordinates relative to the top left point of the image # the predictions of w, h are the square root # multiply the width and height of image # 得到真实 矩形框 坐标中心 和 长宽尺寸 boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,#x小格子占比 (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,#y tf.square(boxes[:, :, :, 2]) * self.width,#w 0~1 * 图片尺寸 tf.square(boxes[:, :, :, 3]) * self.height], axis=3)#h 0~1 * 图片尺寸 ## 最终得分 置信度*类别预测概率 class-specific confidence scores [S, S, B, C] scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)#增加一维 scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C]#98个框 每个框 20个预测得分 boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]#98个框 每个框 四个 边框参数 坐标中心 和 长宽尺寸 # find each box class, only select the max score box_classes = tf.argmax(scores, axis=1)# 在98个框中找到 20个得分中最高的 类别 box_class_scores = tf.reduce_max(scores, axis=1)#最高的 得分 # filter the boxes by the score threshold filter_mask = box_class_scores >= self.threshold#大于得分显示阈值的 scores = tf.boolean_mask(box_class_scores, filter_mask)# 对应最终的得分 boxes = tf.boolean_mask(boxes, filter_mask)#框的位置 box_classes = tf.boolean_mask(box_classes, filter_mask)#类别 # non max suppression (do not distinguish different classes) # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression # box (x, y, w, h) -> box (x1, y1, x2, y2) 得到边框 上四条边的中心点 _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],# x-0.5*w boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1) #非极大值抑制 筛选 剔除 重叠度高的边框 nms_indices = tf.image.non_max_suppression(_boxes, scores, self.max_output_size, self.iou_threshold) self.scores = tf.gather(scores, nms_indices) self.boxes = tf.gather(boxes, nms_indices) self.box_classes = tf.gather(box_classes, nms_indices)
def generate_rpn_proposals(boxes, scores, img_shape, pre_nms_topk, post_nms_topk=None): """ Sample RPN proposals by the following steps: 1. Pick top k1 by scores 2. NMS them 3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output. Args: boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already scores: n float, the logits img_shape: [h, w] pre_nms_topk, post_nms_topk (int): See above. Returns: boxes: kx4 float scores: k logits """ assert boxes.shape.ndims == 2, boxes.shape if post_nms_topk is None: post_nms_topk = pre_nms_topk topk = tf.minimum(pre_nms_topk, tf.size(scores)) topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False) topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = clip_boxes(topk_boxes, img_shape) topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) # nx1x2 each wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n, topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid) # TODO not needed topk_valid_boxes_y1x1y2x2 = tf.reshape( tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]), (-1, 4), name='nms_input_boxes') nms_indices = tf.image.non_max_suppression( topk_valid_boxes_y1x1y2x2, # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578 tf.exp(topk_valid_scores), max_output_size=post_nms_topk, iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH) topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4)) final_boxes = tf.gather(topk_valid_boxes, nms_indices) final_scores = tf.gather(topk_valid_scores, nms_indices) tf.sigmoid(final_scores, name='probs') # for visualization return tf.stop_gradient(final_boxes, name='boxes'), tf.stop_gradient(final_scores, name='scores')
def _build_detector(self): """Interpret the net output and get the predicted boxes""" # the width and height of orignal image self.width = tf.placeholder(tf.float32, name="img_w") self.height = tf.placeholder(tf.float32, name="img_h") # get class prob, confidence, boxes from net output idx1 = self.S * self.S * self.C idx2 = idx1 + self.S * self.S * self.B # class prediction class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C]) # confidence confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B]) # boxes -> (x, y, w, h) boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4]) # convert the x, y to the coordinates relative to the top left point of the image # the predictions of w, h are the square root # multiply the width and height of image boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width, (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height, tf.square(boxes[:, :, :, 2]) * self.width, tf.square(boxes[:, :, :, 3]) * self.height], axis=3) # class-specific confidence scores [S, S, B, C] scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2) scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C] boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4] # find each box class, only select the max score box_classes = tf.argmax(scores, axis=1) box_class_scores = tf.reduce_max(scores, axis=1) # filter the boxes by the score threshold filter_mask = box_class_scores >= self.threshold scores = tf.boolean_mask(box_class_scores, filter_mask) boxes = tf.boolean_mask(boxes, filter_mask) box_classes = tf.boolean_mask(box_classes, filter_mask) # non max suppression (do not distinguish different classes) # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression # box (x, y, w, h) -> box (x1, y1, x2, y2) _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3], boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1) nms_indices = tf.image.non_max_suppression(_boxes, scores, self.max_output_size, self.iou_threshold) self.scores = tf.gather(scores, nms_indices) self.boxes = tf.gather(boxes, nms_indices) self.box_classes = tf.gather(box_classes, nms_indices)
def bboxes_filter_overlap(labels, bboxes, threshold=0.5, scope=None): """Filter out bounding boxes based on overlap with reference box [0, 0, 1, 1]. Return: labels, bboxes: Filtered elements. """ with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]): scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype), bboxes) mask = scores > threshold labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) return labels, bboxes
def segmentation_loss(seg_logits, seg_gt, config): mask = seg_gt <= dataset.num_classes seg_logits = tf.boolean_mask(seg_logits, mask) seg_gt = tf.boolean_mask(seg_gt, mask) seg_predictions = tf.argmax(seg_logits, axis=1) seg_loss_local = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=seg_logits, labels=seg_gt) seg_loss = tf.reduce_mean(seg_loss_local) tf.summary.scalar('loss/segmentation', seg_loss) mean_iou, update_mean_iou = streaming_mean_iou(seg_predictions, seg_gt, dataset.num_classes) tf.summary.scalar('accuracy/mean_iou', mean_iou) return seg_loss, mean_iou, update_mean_iou
def generate_rpn_proposals(boxes, scores, img_shape): """ Args: boxes: nx4 float dtype, decoded to floatbox already scores: n float, the logits img_shape: [h, w] Returns: boxes: kx4 float scores: k logits """ assert boxes.shape.ndims == 2, boxes.shape if get_current_tower_context().is_training: PRE_NMS_TOPK = config.TRAIN_PRE_NMS_TOPK POST_NMS_TOPK = config.TRAIN_POST_NMS_TOPK else: PRE_NMS_TOPK = config.TEST_PRE_NMS_TOPK POST_NMS_TOPK = config.TEST_POST_NMS_TOPK topk = tf.minimum(PRE_NMS_TOPK, tf.size(scores)) topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False) topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = clip_boxes(topk_boxes, img_shape) topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) # nx1x2 each wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1) # n, topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid) topk_valid_boxes_y1x1y2x2 = tf.reshape( tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]), (-1, 4), name='nms_input_boxes') nms_indices = tf.image.non_max_suppression( topk_valid_boxes_y1x1y2x2, topk_valid_scores, max_output_size=POST_NMS_TOPK, iou_threshold=config.RPN_PROPOSAL_NMS_THRESH) topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4)) final_boxes = tf.gather( topk_valid_boxes, nms_indices, name='boxes') final_scores = tf.gather(topk_valid_scores, nms_indices, name='scores') tf.sigmoid(final_scores, name='probs') # for visualization return final_boxes, final_scores
def make_net(self, input_images, input_measurements, input_actions, input_objectives, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() self.fc_val_params = np.copy(self.fc_joint_params) self.fc_val_params['out_dims'][-1] = self.target_dim self.fc_adv_params = np.copy(self.fc_joint_params) self.fc_adv_params['out_dims'][-1] = len(self.net_discrete_actions) * self.target_dim p_img_conv = my_ops.conv_encoder(input_images, self.conv_params, 'p_img_conv', msra_coeff=0.9) p_img_fc = my_ops.fc_net(my_ops.flatten(p_img_conv), self.fc_img_params, 'p_img_fc', msra_coeff=0.9) p_meas_fc = my_ops.fc_net(input_measurements, self.fc_meas_params, 'p_meas_fc', msra_coeff=0.9) if isinstance(self.fc_obj_params, np.ndarray): p_obj_fc = my_ops.fc_net(input_objectives, self.fc_obj_params, 'p_obj_fc', msra_coeff=0.9) p_concat_fc = tf.concat([p_img_fc,p_meas_fc,p_obj_fc], 1) else: p_concat_fc = tf.concat([p_img_fc,p_meas_fc], 1) if self.random_objective_coeffs: raise Exception('Need fc_obj_params with randomized objectives') p_val_fc = my_ops.fc_net(p_concat_fc, self.fc_val_params, 'p_val_fc', last_linear=True, msra_coeff=0.9) p_adv_fc = my_ops.fc_net(p_concat_fc, self.fc_adv_params, 'p_adv_fc', last_linear=True, msra_coeff=0.9) adv_reshape = tf.reshape(p_adv_fc, [-1, len(self.net_discrete_actions), self.target_dim]) pred_all_nomean = adv_reshape - tf.reduce_mean(adv_reshape, reduction_indices=1, keep_dims=True) pred_all = pred_all_nomean + tf.reshape(p_val_fc, [-1, 1, self.target_dim]) pred_relevant = tf.boolean_mask(pred_all, tf.cast(input_actions, tf.bool)) return pred_all, pred_relevant
def bboxes_filter_labels(labels, bboxes, out_labels=[], num_classes=np.inf, scope=None): """Filter out labels from a collection. Typically used to get of DontCare elements. Also remove elements based on the number of classes. Return: labels, bboxes: Filtered elements. """ with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]): mask = tf.greater_equal(labels, num_classes) for l in labels: mask = tf.logical_and(mask, tf.not_equal(labels, l)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) return labels, bboxes
def get_losses(obj_mask): """Get motion constraint loss.""" # Find height of segment. coords = tf.where(tf.greater( # Shape (num_true, 2=yx) obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32))) y_max = tf.reduce_max(coords[:, 0]) y_min = tf.reduce_min(coords[:, 0]) seg_height = y_max - y_min f_y = self.intrinsic_mat[i, 0, 1, 1] approx_depth = ((f_y * self.global_scale_var) / tf.to_float(seg_height)) reference_pred = tf.boolean_mask( depth_pred, tf.greater( tf.reshape(obj_mask[:, :, 0], (self.img_height, self.img_width, 1)), tf.constant(0.5, dtype=tf.float32))) # Establish loss on approx_depth, a scalar, and # reference_pred, our dense prediction. Normalize both to # prevent degenerative depth shrinking. global_mean_depth_pred = tf.reduce_mean(depth_pred) reference_pred /= global_mean_depth_pred approx_depth /= global_mean_depth_pred spatial_err = tf.abs(reference_pred - approx_depth) mean_spatial_err = tf.reduce_mean(spatial_err) return mean_spatial_err
def bbox_to_tensor(bbox, label, input_shape=(416, 416), anchors=YOLOv3_anchors, num_classes=80): # NOTE: input_shape is given in (input height, input width) order # bbox.shape = (box num, 4) which represents (ymin,xmin,ymax,xmax) # label.shape = (box num) # anchors = (9,2) tf.Assert(tf.equal(tf.reduce_all(label < num_classes), True), [label]) num_layers = anchors.shape[0] // 3 tf.Assert( tf.equal( tf.reduce_any([tf.equal(num_layers, 2), tf.equal(num_layers, 3)]), True), [num_layers]) anchor_mask = tf.cond( tf.equal(num_layers, 3), lambda: tf.constant([[6, 7, 8], [3, 4, 5], [0, 1, 2]]), lambda: tf.constant([[3, 4, 5], [1, 2, 3]])) true_boxes_xy = tf.reverse((bbox[..., 0:2] + bbox[..., 2:4]) / 2., axis=[-1]) # box center proportional position true_boxes_wh = tf.reverse(tf.math.abs(bbox[..., 2:4] - bbox[..., 0:2]), axis=[-1]) # box proportional size true_boxes = tf.concat([true_boxes_xy, true_boxes_wh], axis=-1) input_shape_tensor = tf.reverse(tf.convert_to_tensor(input_shape, dtype=tf.float32), axis=[0]) boxes_xy = true_boxes[..., 0:2] * input_shape_tensor # box center absolute position boxes_wh = true_boxes[..., 2:4] * input_shape_tensor # box absolute size # create tensor for label: y_true.shape[layer] = (height, width, anchor num, 5 + class num) y_true = tuple((np.zeros(shape=(input_shape[0] // { 0: 32, 1: 16, 2: 8 }[l], input_shape[1] // { 0: 32, 1: 16, 2: 8 }[l], tf.shape(anchor_mask[l, ...])[0], 5 + num_classes), dtype=np.float32) for l in range(num_layers))) # center the anchor boxes at the origin, get the max and min of corners' (x,y) anchors = tf.expand_dims(tf.convert_to_tensor(anchors, dtype=tf.float32), 0) # anchors.shape = (1, 9, 2) anchor_maxes = anchors / 2. # max of width, height, anchors_maxes.shape = (1, 9, 2) anchor_mins = -anchor_maxes # min of width, height, anchors_mins.shape = (1, 9, 2) # center the bbox at the origin, get the max and min of corners' (x,y) valid_mask = tf.greater(boxes_wh[..., 0], 0) # valid box should have width > 0: valid_mask.shape = (box_num) wh = tf.boolean_mask(boxes_wh, valid_mask) # absolute size: wh.shape = (valid box num, 2) valid_true_boxes = tf.boolean_mask(true_boxes, valid_mask) # box proportional position: valid_true_boxes.shape = (valid box num, 4) valid_label = tf.boolean_mask(label, valid_mask) # valid_label.shape = (valid box num) # if there is any valid bbox, get anchor box which has the maximum iou with current bbox. if wh.shape[0] > 0: wh = tf.expand_dims(wh, -2) # wh.shape = (valid box num, 1, 2) box_maxes = wh / 2 # max of width, height, box_maxes.shape = (valid box num, 1, 2) box_mins = -box_maxes # min of width, height, box_mins.shape = (valid box num, 1, 2) intersect_mins = tf.math.maximum(box_mins, anchor_mins) # intersect_mins.shape = (valid box num, anchor num(9), 2) intersect_maxes = tf.math.minimum(box_maxes, anchor_maxes) # intersect_maxes.shape = (valid box num, anchor num(9), 2) intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.) # intersect_wh.shape = (valid box num, anchor num(9), 2) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] # intersect_area.shape = (valid box num, anchor num(9)) box_area = wh[..., 0] * wh[..., 1] # box_area.shape = (valid box_num, 1) anchor_area = anchors[..., 0] * anchors[..., 1] # anchor_area.shape = (1, anchor num(9)) iou = intersect_area / (box_area + anchor_area - intersect_area) # iou.shape = (valid box num, anchor num(9)) # get the anchor box having maximum iou with each true bbbox best_anchor = tf.math.argmax(iou, axis=-1, output_type=tf.int32) # best_anchor.shape = (valid box num) # fill in label tensor for t in range(tf.shape(best_anchor)[0]): n = best_anchor[t] pos = tf.where(tf.equal(anchor_mask, n)) l = pos[0][0] k = pos[0][1] i = int( tf.clip_by_value(valid_true_boxes[t, 1] * y_true[l].shape[0], clip_value_min=0, clip_value_max=y_true[l].shape[0] - 1)) # absolute center y = proportional y * grid_shape.height j = int( tf.clip_by_value(valid_true_boxes[t, 0] * y_true[l].shape[1], clip_value_min=0, clip_value_max=y_true[l].shape[1] - 1)) # absolute center x = proportional x * grid_shape.width c = valid_label[t] # class y_true[l][i, j, k, 0:4] = valid_true_boxes[t, 0:4] # box proportional position (w,y,width,height) y_true[l][i, j, k, 4] = 1 # object mask y_true[l][i, j, k, 5 + c] = 1 # class mask if num_layers == 3: return (tf.convert_to_tensor(y_true[0]), tf.convert_to_tensor(y_true[1]), tf.convert_to_tensor(y_true[2])) else: return (tf.convert_to_tensor(y_true[0]), tf.convert_to_tensor(y_true[1]))
def preprocess_for_eval(image, labels, bboxes, out_shape=EVAL_SIZE, data_format='NHWC', difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img
def preprocess_for_eval(image, labels, bboxes, height, width, out_shape=EVAL_SIZE, data_format='NHWC', use_whiten=True, difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) num = 0 if labels is not None: num = tf.reduce_sum(tf.cast(labels, tf.int32)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) image = tf.clip_by_value(image, 0., 255.) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) #image = image/255. #image = tf.clip_by_value(image, 0., 255.) #image = tf.subtract(image, 128.) #image = tf.multiply(image, 2.0) if data_format == 'NHWC': image = image else: image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img, num
def likelihood_ratio_filter(node_pairs, modified_adjacency, original_adjacency, d_min, threshold=0.004): """ Filter the input node pairs based on the likelihood ratio test proposed by Zügner et al. 2018, see https://dl.acm.org/citation.cfm?id=3220078. In essence, for each node pair return 1 if adding/removing the edge between the two nodes does not violate the unnoticeability constraint, and return 0 otherwise. Assumes unweighted and undirected graphs. Parameters ---------- node_pairs: tf.Tensor, shape (e, 2) dtype int The e node pairs to consider, where each node pair consists of the two indices of the nodes. modified_adjacency: tf.Tensor shape (N,N) dtype int The input (modified) adjacency matrix. Assumed to be unweighted and symmetric. original_adjacency: tf.Tensor shape (N,N) dtype int The input (original) adjacency matrix. Assumed to be unweighted and symmetric. d_min: int The minimum degree considered in the Powerlaw distribution. threshold: float, default 0.004 Cutoff value for the unnoticeability constraint. Smaller means stricter constraint. 0.004 corresponds to a p-value of 0.95 in the Chi-square distribution with one degree of freedom. Returns ------- allowed_mask: tf.Tensor, shape (e,), dtype bool For each node pair p return True if adding/removing the edge p does not violate the cutoff value, False otherwise. current_ratio: tf.Tensor, shape (), dtype float The current value of the log likelihood ratio. """ N = int(modified_adjacency.shape[0]) original_degree_sequence = tf.cast( tf.reduce_sum(original_adjacency, axis=1), tf.float32) current_degree_sequence = tf.cast( tf.reduce_sum(modified_adjacency, axis=1), tf.float32) # Concatenate the degree sequences concat_degree_sequence = tf.concat( (current_degree_sequence[None, :], original_degree_sequence[None, :]), axis=1) # Compute the log likelihood values of the original, modified, and combined degree sequences. ll_orig, alpha_orig, n_orig, sum_log_degrees_original = degree_sequence_log_likelihood( original_degree_sequence, d_min) ll_current, alpha_current, n_current, sum_log_degrees_current = degree_sequence_log_likelihood( current_degree_sequence, d_min) ll_comb, alpha_comb, n_comb, sum_log_degrees_combined = degree_sequence_log_likelihood( concat_degree_sequence, d_min) # Compute the log likelihood ratio current_ratio = -2 * ll_comb + 2 * (ll_orig + ll_current) # Compute new log likelihood values that would arise if we add/remove the edges corresponding to each node pair. new_lls, new_alphas, new_ns, new_sum_log_degrees = updated_log_likelihood_for_edge_changes( node_pairs, tf.cast(modified_adjacency, tf.float32), d_min) # Combination of the original degree distribution with the distributions corresponding to each node pair. n_combined = n_orig + new_ns new_sum_log_degrees_combined = sum_log_degrees_original + new_sum_log_degrees alpha_combined = compute_alpha(n_combined, new_sum_log_degrees_combined, d_min) new_ll_combined = compute_log_likelihood(n_combined, alpha_combined, new_sum_log_degrees_combined, d_min) new_ratios = -2 * new_ll_combined + 2 * (new_lls + ll_orig) # Allowed edges are only those for which the resulting likelihood ratio measure is < than the threshold allowed_edges = new_ratios < threshold filtered_edges = tf.boolean_mask(node_pairs, allowed_edges) # Get the flattened indices for the allowed edges [e,2] -> [e,], similar to np.ravel_multi_index flat_ixs = ravel_multiple_indices(tf.cast(filtered_edges, tf.int32), modified_adjacency.shape) # Also for the reverse direction (we assume unweighted graphs). flat_ixs_reverse = ravel_multiple_indices( tf.reverse(tf.cast(filtered_edges, tf.int32), [1]), modified_adjacency.shape) # Construct a [N * N] array with ones at the admissible node pair locations and 0 everywhere else. indices_1 = tf.scatter_nd(flat_ixs[:, None], tf.ones_like(flat_ixs, dtype=tf.float32), shape=[N * N]) indices_2 = tf.scatter_nd(flat_ixs_reverse[:, None], tf.ones_like(flat_ixs_reverse, dtype=tf.float32), shape=[N * N]) # Add both directions allowed_mask = tf.clip_by_value(indices_1 + indices_2, 0, 1) return allowed_mask, current_ratio
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): #字面意思(box和label的损失函数 """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded label_logits: fHxfWxNA box_logits: fHxfWxNAx4 Returns: label_loss, box_loss """ with tf.device('/cpu:0'): valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor') # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0. valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero(tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) placeholder = 0.5 # A small value will make summaries appear lower. recall = tf.cast(tf.truediv(pos_prediction_corr, nr_pos), tf.float32) recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th)) precision = tf.cast( tf.truediv(pos_prediction_corr, nr_pos_prediction), tf.float32) precision = tf.where(tf.equal(nr_pos_prediction, 0), placeholder, precision, name='precision_th{}'.format(th)) summaries.extend([precision, recall]) add_moving_summary(*summaries) #这里是loss summary,底下算label和boxes的loss # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # But the total RPN loss will be fine. TODO make the summary op smarter placeholder = 0. label_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.cast(valid_anchor_labels, tf.float32), logits=valid_label_logits) label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM) label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') #这里用cross entropy算labels的loss pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 box_loss = tf.losses.huber_loss(pos_anchor_boxes, pos_box_logits, delta=delta, reduction=tf.losses.Reduction.SUM) / delta box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM) box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss') #这里是huber loss for boxes add_moving_summary(label_loss, box_loss, nr_valid, nr_pos) return [label_loss, box_loss]
def off_diagonal_part(matrix): return tf.boolean_mask(matrix, 1 - tf.eye(int(matrix.shape[0])))
def yolo_eval_batch(yolo_outputs, anchors, num_classes, image_shape, batch_size=1, max_boxes=20, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" print('Inference batch size:', batch_size) num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 all_boxes = [] all_box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, batch_size=batch_size) all_boxes.append(_boxes) all_box_scores.append(_box_scores) all_boxes = K.concatenate(all_boxes, axis=1) all_box_scores = K.concatenate(all_box_scores, axis=1) all_boxes_res = [] all_scores_res = [] all_classes_res = [] for b in range(batch_size): boxes = all_boxes[b] box_scores = all_box_scores[b] mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) all_boxes_res.append(boxes_) all_scores_res.append(scores_) all_classes_res.append(classes_) return all_boxes_res, all_scores_res, all_classes_res
def train_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs): # TODO(lemmatizer_noattn): Modify target_charseqs by appending EOW; only the version with appended EOW is used from now on. with tf.GradientTape() as tape: # TODO(lemmatizer_noattn): Embed source charseqs # TODO: Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`. # Copy the source_encoded to corresponding batch places, and then flatten it source_mask = tf.not_equal(source_charseq_ids, 0) source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask) targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), source_mask) class DecoderTraining(decoder.BaseDecoder): @property def batch_size(self): raise NotImplemented() # TODO: Return batch size of self._source_encoded, using tf.shape @property def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return number of the generated logits @property def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return the type of the generated logits def _with_attention(self, inputs, states): # TODO: Compute the attention. # - Take self._source_encoded and pass it through the self._model.attention_source_layer. # Because self._source_encoded does not change, you should in fact do it in `initialize`. # - Pass `states` though self._model.attention_state_layer. # - Sum the two outputs. However, the first has shape [a, b, c] and the second [a, c]. Therefore, # somehow expand the second to [a, b, c] first. (Hint: use broadcasting rules.) # - Pass the sum through `tf.tanh`, then self._model.attention_weight_layer. # - Then, run softmax on a suitable axis (the one corresponding to characters), generating `weights`. # - Multiply `self._source_encoded` with `weights` and sum the result in the axis # corresponding to characters, generating `attention`. Therefore, `attention` is a a fixed-size # representation for every batch element, independently on how many characters had # the corresponding input forms. # - Finally concatenate `inputs` and `attention` and return the result. def initialize(self, layer_inputs, initial_state=None): self._model, self._source_encoded, self._targets = layer_inputs # TODO(lemmatozer_noattn): Define `finished` as a vector of self.batch_size of `False` [see tf.fill]. # TODO(lemmatizer_noattn): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill], # embedded using self._model.target_embedding # TODO: Define `states` as the last words from self._source_encoded # TODO: Pass `inputs` through `self._with_attention(inputs, states)`. return finished, inputs, states def step(self, time, inputs, states): # TODO(lemmatizer_noattn): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating # `outputs, [states]`. # TODO(lemmatizer_noattn): Overwrite `outputs` by passing them through self._model.target_output_layer, # TODO(lemmatizer_noattn): Define `next_inputs` by embedding `time`-th words from `self._targets`. # TODO(lemmatizer_noattn): Define `finished` as True if `time`-th word from `self._targets` is EOW, False otherwise. # Again, no == or !=. # TODO: Pass `inputs` through `self._with_attention(inputs, states)`. return outputs, states, next_inputs, finished output_layer, _, _ = DecoderTraining()([self._model, source_encoded, targets]) # TODO(lemmatizer_noattn): Compute loss. Use only nonzero `targets` as a mask. gradients = tape.gradient(loss, self._model.variables) self._optimizer.apply_gradients(zip(gradients, self._model.variables)) tf.summary.experimental.set_step(self._optimizer.iterations) with self._writer.as_default(): for name, metric in self._metrics_training.items(): metric.reset_states() if name == "loss": metric(loss) else: metric(targets, output_layer, tf.not_equal(targets, 0)) tf.summary.scalar("train/{}".format(name), metric.result()) return tf.math.argmax(output_layer, axis=2) def train_epoch(self, dataset, args): for batch in dataset.batches(args.batch_size): # TODO(lemmatizer_noattn): Call train_batch, storing results in `predictions`. form, gold_lemma, system_lemma = "", "", "" for i in batch[dataset.FORMS].charseqs[1]: if i: form += dataset.data[dataset.FORMS].alphabet[i] for i in range(len(batch[dataset.LEMMAS].charseqs[1])): if batch[dataset.LEMMAS].charseqs[1][i]: gold_lemma += dataset.data[dataset.LEMMAS].alphabet[batch[dataset.LEMMAS].charseqs[1][i]] system_lemma += dataset.data[dataset.LEMMAS].alphabet[predictions[0][i]] print(float(self._metrics_training["accuracy"].result()), form, gold_lemma, system_lemma) @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 2, autograph=False) def predict_batch(self, source_charseq_ids, source_charseqs): # TODO(lemmatizer_noattn)(train_batch): Embed source charseqs # TODO(train_batch): Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`. # Copy the source_encoded to corresponding batch places, and then flatten it source_mask = tf.not_equal(source_charseq_ids, 0) source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask) class DecoderPrediction(decoder.BaseDecoder): @property def batch_size(self): raise NotImplemented() # TODO(train_batch): Return batch size of self._source_encoded, using tf.shape @property def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return 1 because we are returning directly the predictions @property def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return tf.int32 because the predictions are integral def _with_attention(self, inputs, states): # TODO: A copy of _with_attention from train_batch; you can of course # move the definition to a place where it can be reused in both places. def initialize(self, layer_inputs, initial_state=None): self._model, self._source_encoded = layer_inputs # TODO(lemmatizer_noattn)(train_batch): Define `finished` as a vector of self.batch_size of `False` [see tf.fill]. # TODO(lemmatizer_noattn)(train_batch): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill], # embedded using self._model.target_embedding # TODO(train_batch): Define `states` as the last words from self._source_encoded # TODO(train_batch): Pass `inputs` through `self._with_attention(inputs, states)`. return finished, inputs, states def step(self, time, inputs, states): # TODO(lemmatizer_noattn)(train_batch): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating # `outputs, [states]`. # TODO(lemmatizer_noattn)(train_batch): Overwrite `outputs` by passing them through self._model.target_output_layer, # TODO(lemmatizer_noattn): Overwirte `outputs` by passing them through `tf.argmax` on suitable axis and with # `output_type=tf.int32` parameter. # TODO(lemmatizer_noattn): Define `next_inputs` by embedding the `outputs` # TODO(lemmatizer_noattn): Define `finished` as True if `outputs` are EOW, False otherwise. [No == or !=]. # TODO: Pass `inputs` through `self._with_attention(inputs, states)`. return outputs, states, next_inputs, finished predictions, _, _ = DecoderPrediction(maximum_iterations=tf.shape(source_charseqs)[1] + 10)([self._model, source_encoded]) return predictions @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 4, autograph=False) def evaluate_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs): # Predict predictions = self.predict_batch(source_charseq_ids, source_charseqs) # Append EOW to target_charseqs and copy them to corresponding places and flatten it target_charseqs = self._append_eow(target_charseqs) targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), tf.not_equal(source_charseq_ids, 0)) # Compute accuracy, but on the whole sequences mask = tf.cast(tf.not_equal(targets, 0), tf.int32) resized_predictions = tf.concat([predictions, tf.zeros_like(targets)], axis=1)[:, :tf.shape(targets)[1]] equals = tf.reduce_all(tf.equal(resized_predictions * mask, targets * mask), axis=1) self._metrics_evaluation["accuracy"](equals) def evaluate(self, dataset, dataset_name, args): for metric in self._metrics_evaluation.values(): metric.reset_states() for batch in dataset.batches(args.batch_size): predictions = self.evaluate_batch(batch[dataset.FORMS].charseq_ids, batch[dataset.FORMS].charseqs, batch[dataset.LEMMAS].charseq_ids, batch[dataset.LEMMAS].charseqs) metrics = {name: float(metric.result()) for name, metric in self._metrics_evaluation.items()} with self._writer.as_default(): for name, value in metrics.items(): tf.summary.scalar("{}/{}".format(dataset_name, name), value) return metrics if __name__ == "__main__": import argparse import datetime import os import re # Parse arguments parser = argparse.ArgumentParser() parser.add_argument("--batch_size", default=10, type=int, help="Batch size.") parser.add_argument("--cle_dim", default=64, type=int, help="CLE embedding dimension.") parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.") parser.add_argument("--max_sentences", default=5000, type=int, help="Maximum number of sentences to load.") parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.") parser.add_argument("--rnn_dim", default=64, type=int, help="RNN cell dimension.") parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.") args = parser.parse_args() # Fix random seeds and number of threads np.random.seed(42) tf.random.set_seed(42) if args.recodex: tf.keras.utils.get_custom_objects()["glorot_uniform"] = lambda: tf.initializers.glorot_uniform(seed=42) tf.keras.utils.get_custom_objects()["orthogonal"] = lambda: tf.initializers.orthogonal(seed=42) tf.keras.utils.get_custom_objects()["uniform"] = lambda: tf.initializers.RandomUniform(seed=42) tf.config.threading.set_inter_op_parallelism_threads(args.threads) tf.config.threading.set_intra_op_parallelism_threads(args.threads) # Create logdir name args.logdir = os.path.join("logs", "{}-{}-{}".format( os.path.basename(__file__), datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"), ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items()))) )) # Load the data morpho = MorphoDataset("czech_cac", max_sentences=args.max_sentences) # Create the network and train network = Network(args, num_source_chars=len(morpho.train.data[morpho.train.FORMS].alphabet), num_target_chars=len(morpho.train.data[morpho.train.LEMMAS].alphabet)) for epoch in range(args.epochs): network.train_epoch(morpho.train, args) metrics = network.evaluate(morpho.dev, "dev", args) print("Evaluation on {}, epoch {}: {}".format("dev", epoch + 1, metrics)) metrics = network.evaluate(morpho.test, "test", args) with open("lemmatizer.out", "w") as out_file: print("{:.2f}".format(100 * metrics["accuracy"]), file=out_file)
def predict_batch(self, source_charseq_ids, source_charseqs): # TODO(lemmatizer_noattn)(train_batch): Embed source charseqs # TODO(train_batch): Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`. # Copy the source_encoded to corresponding batch places, and then flatten it source_mask = tf.not_equal(source_charseq_ids, 0) source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask) class DecoderPrediction(decoder.BaseDecoder): @property def batch_size(self): raise NotImplemented() # TODO(train_batch): Return batch size of self._source_encoded, using tf.shape @property def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return 1 because we are returning directly the predictions @property def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return tf.int32 because the predictions are integral def _with_attention(self, inputs, states): # TODO: A copy of _with_attention from train_batch; you can of course # move the definition to a place where it can be reused in both places. def initialize(self, layer_inputs, initial_state=None): self._model, self._source_encoded = layer_inputs # TODO(lemmatizer_noattn)(train_batch): Define `finished` as a vector of self.batch_size of `False` [see tf.fill]. # TODO(lemmatizer_noattn)(train_batch): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill], # embedded using self._model.target_embedding # TODO(train_batch): Define `states` as the last words from self._source_encoded # TODO(train_batch): Pass `inputs` through `self._with_attention(inputs, states)`. return finished, inputs, states def step(self, time, inputs, states): # TODO(lemmatizer_noattn)(train_batch): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating # `outputs, [states]`. # TODO(lemmatizer_noattn)(train_batch): Overwrite `outputs` by passing them through self._model.target_output_layer, # TODO(lemmatizer_noattn): Overwirte `outputs` by passing them through `tf.argmax` on suitable axis and with # `output_type=tf.int32` parameter. # TODO(lemmatizer_noattn): Define `next_inputs` by embedding the `outputs` # TODO(lemmatizer_noattn): Define `finished` as True if `outputs` are EOW, False otherwise. [No == or !=]. # TODO: Pass `inputs` through `self._with_attention(inputs, states)`. return outputs, states, next_inputs, finished predictions, _, _ = DecoderPrediction(maximum_iterations=tf.shape(source_charseqs)[1] + 10)([self._model, source_encoded]) return predictions @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 4, autograph=False) def evaluate_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs): # Predict predictions = self.predict_batch(source_charseq_ids, source_charseqs) # Append EOW to target_charseqs and copy them to corresponding places and flatten it target_charseqs = self._append_eow(target_charseqs) targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), tf.not_equal(source_charseq_ids, 0)) # Compute accuracy, but on the whole sequences mask = tf.cast(tf.not_equal(targets, 0), tf.int32) resized_predictions = tf.concat([predictions, tf.zeros_like(targets)], axis=1)[:, :tf.shape(targets)[1]] equals = tf.reduce_all(tf.equal(resized_predictions * mask, targets * mask), axis=1) self._metrics_evaluation["accuracy"](equals) def evaluate(self, dataset, dataset_name, args): for metric in self._metrics_evaluation.values(): metric.reset_states() for batch in dataset.batches(args.batch_size): predictions = self.evaluate_batch(batch[dataset.FORMS].charseq_ids, batch[dataset.FORMS].charseqs, batch[dataset.LEMMAS].charseq_ids, batch[dataset.LEMMAS].charseqs) metrics = {name: float(metric.result()) for name, metric in self._metrics_evaluation.items()} with self._writer.as_default(): for name, value in metrics.items(): tf.summary.scalar("{}/{}".format(dataset_name, name), value) return metrics
''' #1.where只接受一个参数 ''' where 只接受一个tensor,就是bool矩阵 a [[true ,false,false] [false,true ,false] [false,false,true ]] a.shape->[3,3] where(a),会返回true的坐标 以左上角第一个元素作为坐标原点 所以第一个true的坐标为(0,0),第二个(1,1),第3个(2,2) ''' a = tf.random.normal([3, 3]) mask = a > 0 #大于0的值变为true,小于0的变为false。mask与a的shape相同 tf.boolean_mask(a, mask) #取出a的值,该值在mask中的对应位置为true indices = tf.where(mask) #取出mask中为true的位置的坐标 tf.gather_nd(a, indices) #取出对应坐标的值 #2.where接受3个参数 #where(cond,a,b) ''' 根据cond矩阵中的True来选择对应位置的A矩阵中的元素, False来选择对应位置的B矩阵中的元素,组建一个新的矩阵 作用:有目的性的对a、b矩阵的值进行筛选 ''' #例 a = tf.ones([3, 3]) b = tf.zeros([3, 3]) c = tf.constant([True, True, False], [True, False, False],
def _build_policy_loss(self, i): """Build policy loss and other output tensors. Args: i (namedtuple): Collection of variables to compute policy loss. Returns: tf.Tensor: Policy loss. tf.Tensor: Mean policy KL divergence. Raises: NotImplementedError: If is_recurrent is True. """ pol_dist = self._policy_network.dist old_pol_dist = self._old_policy_network.dist # Initialize dual params self._param_eta = 15. self._param_v = np.random.rand( self._env_spec.observation_space.flat_dim * 2 + 4) with tf.name_scope('bellman_error'): delta_v = tf.boolean_mask(i.reward_var, i.valid_var) + tf.tensordot( i.feat_diff, i.param_v, 1) with tf.name_scope('policy_loss'): ll = pol_dist.log_prob(i.action_var) ll = tf.boolean_mask(ll, i.valid_var) loss = -tf.reduce_mean( ll * tf.exp(delta_v / i.param_eta - tf.reduce_max(delta_v / i.param_eta))) reg_params = self.policy.get_regularizable_vars() loss += self._l2_reg_loss * tf.reduce_sum( [tf.reduce_mean(tf.square(param)) for param in reg_params]) / len(reg_params) with tf.name_scope('kl'): kl = old_pol_dist.kl_divergence(pol_dist) pol_mean_kl = tf.reduce_mean(kl) with tf.name_scope('dual'): dual_loss = i.param_eta * self._epsilon + ( i.param_eta * tf.math.log( tf.reduce_mean( tf.exp(delta_v / i.param_eta - tf.reduce_max(delta_v / i.param_eta)))) + i.param_eta * tf.reduce_max(delta_v / i.param_eta)) dual_loss += self._l2_reg_dual * (tf.square(i.param_eta) + tf.square(1 / i.param_eta)) dual_grad = tf.gradients(dual_loss, [i.param_eta, i.param_v]) # yapf: disable self._f_dual = tensor_utils.compile_function( flatten_inputs(self._dual_opt_inputs), dual_loss, log_name='f_dual') # yapf: enable self._f_dual_grad = tensor_utils.compile_function( flatten_inputs(self._dual_opt_inputs), dual_grad, log_name='f_dual_grad') self._f_policy_kl = tensor_utils.compile_function( flatten_inputs(self._policy_opt_inputs), pol_mean_kl, log_name='f_policy_kl') return loss
def _build_policy_loss(self, i): """Build policy loss and other output tensors. Args: i (namedtuple): Collection of variables to compute policy loss. Returns: tf.Tensor: Policy loss. tf.Tensor: Mean policy KL divergence. """ # pylint: disable=too-many-statements self._policy_network, self._encoder_network = (self.policy.build( i.augmented_obs_var, i.task_var, name='loss_policy')) self._old_policy_network, self._old_encoder_network = ( self._old_policy.build(i.augmented_obs_var, i.task_var, name='loss_old_policy')) self._infer_network = self._inference.build(i.augmented_traj_var, name='loss_infer') self._old_infer_network = self._old_inference.build( i.augmented_traj_var, name='loss_old_infer') pol_dist = self._policy_network.dist old_pol_dist = self._old_policy_network.dist # Entropy terms encoder_entropy, inference_ce, policy_entropy = ( self._build_entropy_terms(i)) # Augment the path rewards with entropy terms with tf.name_scope('augmented_rewards'): rewards = (i.reward_var - (self.inference_ce_coeff * inference_ce) + (self._policy_ent_coeff * policy_entropy)) with tf.name_scope('policy_loss'): with tf.name_scope('advantages'): adv = compute_advantages(self._discount, self._gae_lambda, self.max_episode_length, i.baseline_var, rewards, name='advantages') adv = tf.reshape(adv, [-1, self.max_episode_length]) # Optionally normalize advantages eps = tf.constant(1e-8, dtype=tf.float32) if self._center_adv: adv = center_advs(adv, axes=[0], eps=eps) if self._positive_adv: adv = positive_advs(adv, eps) # Calculate loss function and KL divergence with tf.name_scope('kl'): kl = old_pol_dist.kl_divergence(pol_dist) pol_mean_kl = tf.reduce_mean(kl) ll = pol_dist.log_prob(i.action_var, name='log_likelihood') # Calculate surrogate loss with tf.name_scope('surr_loss'): old_ll = old_pol_dist.log_prob(i.action_var) old_ll = tf.stop_gradient(old_ll) # Clip early to avoid overflow lr = tf.exp( tf.minimum(ll - old_ll, np.log(1 + self._lr_clip_range))) surrogate = lr * adv surrogate = tf.debugging.check_numerics(surrogate, message='surrogate') # Finalize objective function with tf.name_scope('loss'): lr_clip = tf.clip_by_value(lr, 1 - self._lr_clip_range, 1 + self._lr_clip_range, name='lr_clip') surr_clip = lr_clip * adv obj = tf.minimum(surrogate, surr_clip, name='surr_obj') obj = tf.boolean_mask(obj, i.valid_var) # Maximize E[surrogate objective] by minimizing # -E_t[surrogate objective] loss = -tf.reduce_mean(obj) # Encoder entropy bonus loss -= self.encoder_ent_coeff * encoder_entropy encoder_mean_kl = self._build_encoder_kl() # Diagnostic functions self._f_policy_kl = tf.compat.v1.get_default_session( ).make_callable(pol_mean_kl, feed_list=flatten_inputs(self._policy_opt_inputs)) self._f_rewards = tf.compat.v1.get_default_session().make_callable( rewards, feed_list=flatten_inputs(self._policy_opt_inputs)) returns = discounted_returns(self._discount, self.max_episode_length, rewards, name='returns') self._f_returns = tf.compat.v1.get_default_session().make_callable( returns, feed_list=flatten_inputs(self._policy_opt_inputs)) return loss, pol_mean_kl, encoder_mean_kl
def metric_c(y_true, y_pred): classes = tf.argmax(y_true, axis=0) class_true = tf.boolean_mask(y_true, tf.equal(classes, 2)) class_pred = tf.boolean_mask(y_pred, tf.equal(classes, 2)) return tf.keras.metrics.categorical_accuracy(class_true, class_pred)
def make_model(self): #TODO: refactor if self.args['--pr'] == 'molecule': self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') elif self.args['--pr'] in ['identity']: self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [None, None, self.num_edge_types, None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [self.num_edge_types, None], name='target_mask') elif self.args['--pr'] in ['btb']: self.placeholders['target_values_head'] = tf.compat.v1.placeholder( tf.float32, [None, None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [self.output_size_edges, None], name='target_mask') self.placeholders[ 'target_values_edges'] = tf.compat.v1.placeholder( tf.float32, [None, None], name='target_values') else: self.placeholders['target_values'] = tf.compat.v1.placeholder( tf.float32, [None, len(self.params['task_ids']), None], name='target_values') self.placeholders['target_mask'] = tf.compat.v1.placeholder( tf.float32, [len(self.params['task_ids']), None], name='target_mask') self.placeholders['num_graphs'] = tf.compat.v1.placeholder( tf.int32, [], name='num_graphs') self.placeholders[ 'out_layer_dropout_keep_prob'] = tf.compat.v1.placeholder( tf.float32, [], name='out_layer_dropout_keep_prob') with tf.compat.v1.variable_scope("graph_model"): self.prepare_specific_graph_model() # This does the actual graph work: self.ops[ 'initial_node_representations'] = self.get_initial_node_representation( ) if self.params['use_graph']: self.ops[ 'final_node_representations'] = self.compute_final_node_representations( self.ops['initial_node_representations']) self.ops[ 'second_node_representations'] = self.compute_final_node_representations( self.ops['initial_node_representations'], 1) else: self.ops['final_node_representations'] = tf.zeros_like( self.placeholders['initial_node_representation']) self.ops['losses'] = [] self.ops['losses_edges'] = [] for (internal_id, task_id) in enumerate(self.params['task_ids']): with tf.compat.v1.variable_scope("out_layer_task%i" % task_id): output_size = self.params['output_size'] hidden = [] with tf.compat.v1.variable_scope("regression_gate"): self.weights['regression_gate_task%i' % task_id] = MLP( 2 * self.params['hidden_size'], output_size, hidden, self.placeholders['out_layer_dropout_keep_prob']) self.weights[ 'regression_gate_task_edges%i' % task_id] = MLP( 2 * self.params['hidden_size'], self.output_size_edges, [], self.placeholders['out_layer_dropout_keep_prob']) with tf.compat.v1.variable_scope("regression"): self.weights[ 'regression_transform_task%i' % task_id] = MLP( self.params['hidden_size'], output_size, [], self.placeholders['out_layer_dropout_keep_prob']) self.weights[ 'regression_transform_task_edges%i' % task_id] = MLP( self.params['hidden_size'], self.output_size_edges, [], self.placeholders['out_layer_dropout_keep_prob']) computed_values = self.gated_regression( self.ops['final_node_representations'], self.ops['initial_node_representations'], self.weights['regression_gate_task%i' % task_id], self.weights['regression_transform_task%i' % task_id], None) # BTB [b, v * o] ID [e * v * o, b] o is 1 for BTB if self.args['--pr'] in ['btb']: computed_values_edges = self.gated_regression( self.ops['final_node_representations'], self.ops['initial_node_representations'], self.weights['regression_gate_task_edges%i' % task_id], self.weights['regression_transform_task_edges%i' % task_id], None, is_edge_regr=True) # [b, v * e] task_target_mask = self.placeholders['target_mask'][ internal_id, :] # ID [b] else: [b] task_target_num = tf.reduce_sum( input_tensor=task_target_mask) + SMALL_NUMBER # ID and else: b if self.args['--pr'] == 'molecule': labels = self.placeholders['target_values'][internal_id, :] mask = tf.transpose(a=self.placeholders['node_mask']) elif self.args['--pr'] in ['identity']: labels = self.placeholders['target_values'] # [o, v, e, b] labels = tf.transpose(a=labels, perm=[2, 1, 0, 3]) # [e, v, o, b] labels = tf.reshape(labels, [-1, self.placeholders['num_graphs'] ]) # [e * v * o, b] # node_mask ID [b, e * v * o] mask = tf.transpose( a=self.placeholders['node_mask']) # [e * v * o,b] # ID: [e * v * o,b] elif self.args['--pr'] in ['btb']: labels = self.placeholders[ 'target_values_head'] # [b, v * o] mask = self.placeholders['node_mask'] #[b, v * o] labels_edges = self.placeholders[ 'target_values_edges'] # [b, v * e] mask_edges = self.placeholders[ 'node_mask_edges'] # [b, v * e] else: labels = self.placeholders['target_values'][:, internal_id, :] mask = tf.transpose(a=self.placeholders['node_mask']) # diff = computed_values - labels # diff = diff * task_target_mask # Mask out unused values # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num # # Normalise loss to account for fewer task-specific examples in batch: # task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0)) # diff = tf.math.argmax(computed_values, axis = 1) - tf.math.argmax(self.placeholders['target_values'][internal_id, :], axis = 1) # diff = tf.dtypes.cast(diff, tf.float32) #TODO: FIX THIS # computed_values *= task_target_mask # we need to redo accuracy # diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels, # logits=computed_values) # task_loss = diff if self.args['--pr'] == 'molecule': self.calculate_losses_for_molecules( computed_values, internal_id, task_id) else: if self.args['--pr'] == 'btb': task_loss_heads = tf.reduce_sum(-tf.reduce_sum( labels * tf.math.log(computed_values), axis=1) ) / task_target_num task_loss_edges = tf.reduce_sum(-tf.reduce_sum( labels_edges * tf.math.log(computed_values_edges), axis=1)) / task_target_num # task_loss = (task_loss_heads + task_loss_edges) * tf.cast(self.placeholders['num_vertices'], tf.float32) task_loss = (task_loss_heads + task_loss_edges) else: if self.args.get('--no_labels'): computed_values, labels, mask = self.reduce_edge_dimension( computed_values=computed_values, labels=labels, mask=mask) new_mask = tf.cast(mask, tf.bool) masked_loss = tf.boolean_mask( tensor=labels * tf.math.log(computed_values), mask=new_mask) task_loss = tf.reduce_sum( input_tensor=-1 * masked_loss) / task_target_num self.ops['accuracy_task%i' % task_id] = task_loss self.ops['losses'].append(task_loss) self.ops['losses_edges'].append(task_loss_edges) self.ops['computed_values'] = computed_values self.ops['computed_values_edges'] = computed_values_edges self.ops['labels'] = labels self.ops['node_mask'] = tf.transpose( mask) if self.args['--pr'] != 'btb' else mask self.ops['task_target_mask'] = task_target_mask self.ops['loss'] = tf.reduce_sum(input_tensor=self.ops['losses']) self.ops['loss_edges'] = tf.reduce_sum( input_tensor=self.ops['losses_edges'])
def _build_single_target(self, proposals, gt_boxes, gt_class_ids, img_shape): ''' Args --- proposals: [num_proposals, (y1, x1, y2, x2)] in regular coordinates. gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] gt_class_ids: [num_gt_boxes] img_shape: np.ndarray. [2]. (img_height, img_width) Returns --- rois: [num_rois, (y1, x1, y2, x2)] target_matchs: [num_positive_rois] target_deltas: [num_positive_rois, (dy, dx, log(dh), log(dw))] ''' # remove padded proposals and gt boxes if any proposals, _ = trim_zeros(proposals) gt_boxes, non_zeros = trim_zeros(gt_boxes) gt_boxes = tf.cast(gt_boxes, proposals.dtype) gt_labels = tf.boolean_mask(gt_class_ids, non_zeros) noise_mean = 5.0 noisy_gt_boxes = tf.add(gt_boxes, tf.random.truncated_normal(tf.shape(gt_boxes), noise_mean, 0.1, dtype=proposals.dtype)) proposals_gt = tf.concat([proposals, noisy_gt_boxes], axis=0) iou = geometry.compute_overlaps(proposals_gt, gt_boxes) # [rois_size, gt_bboxes_size] max_overlaps = tf.reduce_max(iou, axis=1) # [rois_size, ] gt_assignment = tf.argmax(iou, axis=1) # [rois_size, ] labels = tf.gather(gt_labels, gt_assignment) # [rois_size, ] # get FG and BG fg_inds = tf.where(max_overlaps >= self.pos_iou_thr)[:, 0] bg_inds = tf.where(tf.logical_and(max_overlaps < self.pos_iou_thr, max_overlaps >= self.neg_iou_thr))[:, 0] # filter FG/BG if tf.size(fg_inds) > self._max_pos_samples: fg_inds = tf.random.shuffle(fg_inds)[:self._max_pos_samples] remaining = self.num_rcnn_deltas - tf.size(fg_inds) num_bg = tf.size(bg_inds) if tf.greater_equal(num_bg, remaining): bg_inds = tf.random.shuffle(bg_inds)[:remaining] else: # sample with replacement from very poor overlaps if number of backgrounds is not enough bg_inds = tf.where(max_overlaps < self.pos_iou_thr)[:, 0] bg_inds = tf.random.shuffle(bg_inds)[:remaining] num_bg = tf.size(bg_inds) while remaining > num_bg: dups = remaining - num_bg dup_bgs = tf.random.shuffle(bg_inds)[:dups] bg_inds = tf.concat([bg_inds, dup_bgs], axis=0) num_bg = tf.size(bg_inds) # tf.print('proposal target generated %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds))) keep_inds = tf.concat([fg_inds, bg_inds], axis=0) final_rois = tf.gather(proposals_gt, keep_inds) # rois[keep_inds] final_labels = tf.gather(labels, keep_inds) # labels[keep_inds] zero_indices = tf.expand_dims(tf.range(tf.size(fg_inds), tf.size(keep_inds), dtype=tf.int32), axis=1) zero_labels = tf.zeros(tf.shape(zero_indices)[0], dtype=tf.int32) final_labels = tf.tensor_scatter_nd_update(final_labels, zero_indices, zero_labels) # inside weights - positive examples are set, rest are zeros bbox_inside_weights = tf.zeros((tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32) if tf.size(fg_inds) > 0: if self.reg_class_agnostic: cur_index = tf.transpose(tf.stack([tf.range(tf.size(fg_inds)), tf.zeros(tf.size(fg_inds), dtype=tf.int32)])) else: cur_index = tf.stack([tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds)], axis=1) bbox_inside_weights = tf.tensor_scatter_nd_update(bbox_inside_weights, cur_index, tf.ones([tf.size(fg_inds), 4], bbox_inside_weights.dtype)) bbox_inside_weights = tf.reshape(bbox_inside_weights, [-1, self.num_classes * 4]) final_bbox_targets = tf.zeros((tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32) if tf.size(fg_inds) > 0: bbox_targets = transforms.bbox2delta( tf.gather(final_rois, tf.range(tf.size(fg_inds))), tf.gather(gt_boxes, tf.gather(gt_assignment, fg_inds)), target_stds=self.target_stds, target_means=self.target_means) if self.reg_class_agnostic: final_bbox_targets = tf.tensor_scatter_nd_update( final_bbox_targets, tf.transpose(tf.stack([tf.range(tf.size(fg_inds)), tf.zeros(tf.size(fg_inds), dtype=tf.int32)])), bbox_targets) else: final_bbox_targets = tf.tensor_scatter_nd_update( final_bbox_targets, tf.stack([tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds)], axis=1), bbox_targets) final_bbox_targets = tf.reshape(final_bbox_targets, [-1, self.num_classes * 4]) bbox_outside_weights = tf.ones_like(bbox_inside_weights, dtype=bbox_inside_weights.dtype) * 1.0 / self.num_rcnn_deltas fg_assignments = tf.gather(gt_assignment, keep_inds) return (tf.stop_gradient(final_rois), tf.stop_gradient(final_labels), tf.stop_gradient(final_bbox_targets), tf.stop_gradient(bbox_inside_weights), tf.stop_gradient(bbox_outside_weights), tf.stop_gradient(fg_assignments))
def _get_cost(self, logits, cost_name, cost_kwargs={}): """ Constructs the cost function, either cross_entropy, weighted cross_entropy or dice_coefficient. Optional arguments are: class_weights: weights for the different classes in case of multi-class imbalance regularizer: power of the L2 regularizers added to the loss function """ flat_logits = tf.reshape(logits, [-1, self.n_class]) flat_labels = tf.reshape(self.y, [-1, self.n_class]) if cost_name == "cross_entropy": if "class_weights" in cost_kwargs: class_weights = tf.constant( np.array(cost_kwargs["class_weights"], dtype=np.float32)) weight_map = tf.multiply(flat_labels, class_weights) weight_map = tf.reduce_sum(weight_map, axis=1) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) weighted_loss = tf.multiply(loss_map, weight_map) loss = tf.reduce_mean(weighted_loss) else: loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels)) # elif cost_name == "dice_coefficient": # eps = 1e-5 # prediction = pixel_wise_softmax_2(logits) # intersection = tf.reduce_sum(prediction * self.y, axis=[0, 1, 2]) # union = eps + tf.reduce_sum(prediction, axis=[0, 1, 2]) + tf.reduce_sum(self.y, axis=[0, 1, 2]) - intersection # loss = tf.reduce_sum(-(2 * intersection / (union))) elif cost_name == "liver_dice": eps = 1e-5 prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3) gt = tf.argmax(self.y, axis=3) prediction_b = prediction > 0 gt_b = gt > 0 intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b)) size_pred = tf.to_float(tf.count_nonzero(prediction_b)) size_gt = tf.to_float(tf.count_nonzero(gt_b)) loss = -(2. * intersection / (size_pred + size_gt + eps)) elif cost_name == "tumor_dice": eps = 1e-5 prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3) gt = tf.argmax(self.y, axis=3) prediction_b = prediction > 1 gt_b = gt > 1 intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b)) size_pred = tf.to_float(tf.count_nonzero(prediction_b)) size_gt = tf.to_float(tf.count_nonzero(gt_b)) loss = -(2. * intersection / (size_pred + size_gt + eps)) elif cost_name == "avg_class_ce": if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) # class_weights = tf.Print(class_weights, [class_weights], 'Class weigihts:') weight_map = tf.multiply(flat_labels, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:') px_per_class = tf.reduce_sum(flat_labels, axis=0) # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:') include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:') loss = tf.reduce_mean(loss_per_class) # loss = tf.Print(loss, [loss], "Loss:") elif cost_name == "avg_class_ce_binary": """Only care whether it's tumor or not""" if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class - 1) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) combined_labels = tf.stack( [flat_labels[:, 0] + flat_labels[:, 1], flat_labels[:, 2]], axis=1) combined_logits = tf.stack([ tf.log(tf.exp(flat_logits[:, 0]) + tf.exp(flat_logits[:, 1])), flat_logits[:, 2] ], axis=1) weight_map = tf.multiply(combined_labels, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=combined_logits, labels=combined_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class - 1]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:') px_per_class = tf.reduce_sum(combined_labels, axis=0) # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:') include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:') loss = tf.reduce_mean(loss_per_class) # loss = tf.Print(loss, [loss], "Loss:") elif cost_name == "avg_class_ce_symmetric": prediction = pixel_wise_softmax_2(logits) flat_prediction = tf.reshape(prediction, [-1, self.n_class]) if "class_weights" in cost_kwargs: class_weights = cost_kwargs["class_weights"] else: class_weights = np.ones(self.n_class) class_weights = tf.constant( np.array(class_weights, dtype=np.float32)) weight_map = tf.multiply(flat_labels, class_weights) + 0.1 * tf.multiply( flat_prediction, class_weights) loss_map = tf.nn.softmax_cross_entropy_with_logits( logits=flat_logits, labels=flat_labels) loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class]) # both are npixel x n_class weighted_loss = tf.multiply(loss_map, weight_map) loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0) px_per_class = tf.reduce_sum( flat_labels, axis=0) + 0.1 * tf.reduce_sum(flat_prediction, axis=0) include_class = tf.not_equal(px_per_class, 0) loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class, include_class) px_per_class_valid = tf.boolean_mask(px_per_class, include_class) loss_per_class = tf.divide(loss_sum_per_class_valid, px_per_class_valid) loss = tf.reduce_mean(loss_per_class) else: raise ValueError("Unknown cost function: " % cost_name) if "regularizer" in cost_kwargs: regularizer = cost_kwargs["regularizer"] regularizers = sum( [tf.nn.l2_loss(variable) for variable in self.variables]) loss += (regularizer * regularizers) return loss
def discriminative_loss_single(prediction, correct_label, feature_dim, label_shape, delta_v, delta_d, param_var, param_dist, param_reg): ''' Discriminative loss for a single prediction/label pair. :param prediction: inference of network :param correct_label: instance label :feature_dim: feature dimension of prediction :param label_shape: shape of label :param delta_v: cutoff variance distance :param delta_d: curoff cluster distance :param param_var: weight for intra cluster variance :param param_dist: weight for inter cluster distances :param param_reg: weight regularization ''' ### Reshape so pixels are aligned along a vector correct_label = tf.reshape(correct_label, [label_shape[1]*label_shape[0]]) reshaped_pred = tf.reshape(prediction, [label_shape[1]*label_shape[0], feature_dim]) ### Count instances unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) counts = tf.cast(counts, tf.float32) num_instances = tf.size(unique_labels) segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id, num_instances) mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1))) mu_expand = tf.gather(mu, unique_id) ### Calculate l_var distance = tf_norm(tf.subtract(mu_expand, reshaped_pred), axis=1) distance = tf.subtract(distance, delta_v) distance = tf.clip_by_value(distance, 0., distance) distance = tf.square(distance) l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances) l_var = tf.div(l_var, counts) l_var = tf.reduce_sum(l_var) l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) ### Calculate l_dist # Get distance for each pair of clusters like this: # mu_1 - mu_1 # mu_2 - mu_1 # mu_3 - mu_1 # mu_1 - mu_2 # mu_2 - mu_2 # mu_3 - mu_2 # mu_1 - mu_3 # mu_2 - mu_3 # mu_3 - mu_3 mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) mu_band_rep = tf.tile(mu, [1, num_instances]) mu_band_rep = tf.reshape(mu_band_rep, (num_instances*num_instances, feature_dim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # Filter out zeros from same cluster subtraction intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff),axis=1) zero_vector = tf.zeros(1, dtype=tf.float32) bool_mask = tf.not_equal(intermediate_tensor, zero_vector) mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) mu_norm = tf_norm(mu_diff_bool, axis=1) mu_norm = tf.subtract(2.*delta_d, mu_norm) mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) mu_norm = tf.square(mu_norm) l_dist = tf.reduce_mean(mu_norm) ### Calculate l_reg l_reg = tf.reduce_mean(tf_norm(mu, axis=1)) param_scale = 1. l_var = param_var * l_var l_dist = param_dist * l_dist l_reg = param_reg * l_reg loss = param_scale*(l_var + l_dist + l_reg) return loss, l_var, l_dist, l_reg
def loss_op(self): with tf.variable_scope("loss"): # [batch_size * seq_length, 1] neg_labels = tf.reshape(self.music_id, [-1, 1]) # [batch_size * seq_length, hidden_dim] neg_inputs = tf.reshape(self.transformer_out, [-1, self.hidden_dim]) nce_weights = tf.get_variable( name='nce_weights', initializer=tf.truncated_normal( [self.music_num, self.hidden_dim], stddev=1.0 / math.sqrt(self.hidden_dim))) nce_biases = tf.get_variable(name='nce_biases', initializer=tf.zeros([self.music_num ])) mask = tf.sequence_mask(self.lengths) # if tf.equal(self.is_train, 1) is not None: # print("房雨帆") # 没有mask [batch_size * seq_length] loss = tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=neg_labels, inputs=neg_inputs, num_sampled=self.neg_num, num_classes=self.music_num, remove_accidental_hits=True) loss = tf.reshape(loss, [-1, self.sequence_length]) self.loss = tf.reduce_mean(tf.boolean_mask(loss, mask)) # else: # print("孙香") logits = tf.matmul(neg_inputs, tf.transpose(nce_weights)) # [batch_size*seq_length, music_num] self.logits = tf.nn.bias_add(logits, nce_biases) # 矩阵分块相乘,处理程序瓶颈 # [hidden_dim, music_num] # nce_weights_t = tf.transpose(nce_weights) # part_len = int(self.music_num // 32) # part_value = [] # part_index = [] # for i in range(31): # part = nce_weights_t[:, i*part_len:(i+1)*part_len] # res = tf.nn.bias_add(tf.matmul(neg_inputs, part), # nce_biases[i*part_len:(i+1)*part_len]) # res_k = tf.nn.top_k(res, self.top_k) # part_index.append(res_k[1]) # part_value.append(res_k[0]) # # if self.music_num % 32 > 0: # part = nce_weights_t[:, 31*part_len:] # res = tf.nn.bias_add(tf.matmul(neg_inputs, part), # nce_biases[31*part_len:]) # res_k = tf.nn.top_k(res, self.top_k) # part_index.append(res_k[1]) # part_value.append(res_k[0]) # self.index = tf.concat(part_index, -1) # self.value = tf.concat(part_value, -1) # self.logits = tf.nn.bias_add(tf.concat(part_res, -1), nce_biases) # [batch_size * seq_length, music_num] # labels_one_hot = tf.one_hot(neg_labels, self.music_num) # labels_one_hot = tf.reshape(labels_one_hot, [-1, self.music_num]) # # [batch_size * seq_length] # loss = tf.nn.sigmoid_cross_entropy_with_logits( # labels=labels_one_hot, # logits=logits) # loss = tf.reshape(loss, [-1, self.sequence_length]) # self.loss_test = tf.reduce_mean(tf.boolean_mask(loss, mask)) self.loss_test = tf.constant(0, dtype=tf.float32, shape=[]) # [batch_size * seq_length] neg_labels = tf.reshape(self.music_id, [-1]) # [batch_size * seq_length] hit = tf.nn.in_top_k(self.logits, neg_labels, self.top_k) hit = tf.reshape(hit, [-1, self.sequence_length]) mask_hit = tf.boolean_mask(hit, mask) self.hit_shape = tf.shape(mask_hit) self.recall = tf.reduce_mean(tf.to_float(mask_hit)) # [batch_size*seq_length, top_k] top_k_index = tf.nn.top_k(self.logits, self.top_k)[1] index_mask = tf.boolean_mask(top_k_index, tf.reshape(mask, [-1])) label_mask = tf.boolean_mask(neg_labels, tf.reshape(mask, [-1])) label_mask = tf.reshape(label_mask, [-1, 1]) self.rank = tf.where( tf.equal(tf.to_int32(index_mask), tf.to_int32(label_mask)))[:, -1]
def upper_triangular_part(matrix): a = tf.linalg.band_part(tf.ones(matrix.shape), -1, 0) return tf.boolean_mask(matrix, 1 - a)
def updated_log_likelihood_for_edge_changes(node_pairs, adjacency_matrix, d_min): """ Compute the change of the log likelihood of the Powerlaw distribution fit on the input adjacency matrix's degree distribution that results when adding/removing edges for the input node pairs. Assumes an undirected unweighted graph. Parameters ---------- node_pairs: tf.Tensor, shape (e, 2) dtype int The e node pairs to consider, where each node pair consists of the two indices of the nodes. adjacency_matrix: tf.Tensor shape (N,N) dtype int The input adjacency matrix. Assumed to be unweighted and symmetric. d_min: int The minimum degree considered in the Powerlaw distribution. Returns ------- new_ll: tf.Tensor of shape (e,) and dtype float The log likelihoods for node pair in node_pairs obtained when adding/removing the edge for that node pair. new_alpha: tf.Tensor of shape (e,) and dtype float For each node pair, contains the maximum likelihood estimates of the Powerlaw distributions obtained when adding/removing the edge for that node pair. new_n: tf.Tensor of shape (e,) and dtype float The updated number of degrees which are >= d_min for each potential edge being added/removed. sum_log_degrees_after: tf.Tensor of floats shape (e,) The updated sum of log degrees whose values are >= d_min for each of the e potential edges being added/removed. """ # For each node pair find out whether there is an edge or not in the input adjacency matrix. edge_entries_before = tf.cast( tf.gather_nd(adjacency_matrix, tf.cast(node_pairs, tf.int32)), tf.float32) # Compute the degree for each node degree_seq = tf.reduce_sum(adjacency_matrix, 1) # Determine which degrees are to be considered, i.e. >= d_min. in_range = tf.greater_equal(degree_seq, d_min) # Sum the log of the degrees to be considered sum_log_degrees = tf.reduce_sum( tf.log(tf.boolean_mask(degree_seq, in_range))) # Number of degrees >= d_min n = tf.cast(tf.count_nonzero(in_range), tf.float32) # The changes to the edge entries to add an edge if none was present and remove it otherwise. # i.e., deltas[ix] = -1 if edge_entries[ix] == 1 else 1 deltas = -2 * edge_entries_before + 1 # The degrees of the nodes in the input node pairs d_edges_before = tf.gather(degree_seq, tf.cast(node_pairs, tf.int32)) # The degrees of the nodes in the input node pairs after performing the change (i.e. adding the respective value of # delta. d_edges_after = tf.gather(degree_seq, tf.cast(node_pairs, tf.int32)) + deltas[:, None] # Sum the log of the degrees after the potential changes which are >= d_min sum_log_degrees_after, new_n = update_sum_log_degrees( sum_log_degrees, n, d_edges_before, d_edges_after, d_min) # Update the number of degrees >= d_min new_n = tf.cast(new_n, tf.float32) # Updated estimates of the Powerlaw exponents new_alpha = compute_alpha(new_n, sum_log_degrees_after, d_min) # Updated log likelihood values for the Powerlaw distributions new_ll = compute_log_likelihood(new_n, new_alpha, sum_log_degrees_after, d_min) return new_ll, new_alpha, new_n, sum_log_degrees_after
def tf_retrieve_timesteps(self, n): num_buffer_elems = tf.minimum(x=self.buffer_index, y=n) # We can only sample from priority memory if buffer elements were previously inserted. num_priority_elements = tf.cond( pred=self.memory_size > 0, true_fn=lambda: n - num_buffer_elems, false_fn=lambda: 0 ) def sampling_fn(): # Vectorized sampling. sum_priorities = tf.reduce_sum(input_tensor=self.priorities, axis=0) sample = tf.random_uniform(shape=(num_priority_elements,), dtype=tf.float32) indices = tf.zeros(shape=(num_priority_elements,), dtype=tf.int32) def cond(loop_index, sample): return tf.reduce_all(input_tensor=(sample <= 0.0)) def sampling_body(loop_index, sample): priority = tf.gather(params=self.priorities, indices=loop_index) sample -= priority / sum_priorities loop_index += tf.cast( x=(sample > 0.0), dtype=tf.int32, ) return loop_index, sample priority_indices = tf.while_loop( cond=cond, body=sampling_body, loop_vars=(indices, sample) )[0] return priority_indices # Reset batch indices. assignment = tf.assign( ref=self.batch_indices, value=tf.zeros(shape=tf.shape(self.batch_indices), dtype=tf.int32) ) with tf.control_dependencies(control_inputs=(assignment,)): priority_indices = tf.cond( pred=num_priority_elements > 0, true_fn=sampling_fn, false_fn=lambda: tf.zeros(shape=(num_priority_elements,), dtype=tf.int32) ) priority_terminal = tf.gather(params=self.terminal_memory, indices=priority_indices) priority_indices = tf.boolean_mask(tensor=priority_indices, mask=tf.logical_not(x=priority_terminal)) # Store how many elements we retrieved from the buffer for updating priorities. # Note that this is just the count, as we can reconstruct the indices from that. assignments = list() assignments.append(tf.assign(ref=self.last_batch_buffer_elems, value=num_buffer_elems)) # Store indices used from priority memory. Note that these are the full indices # as they were not taken in order. update = tf.ones(shape=tf.shape(input=priority_indices), dtype=tf.int32) assignments.append(tf.scatter_update( ref=self.batch_indices, indices=priority_indices, updates=update )) # Fetch results. with tf.control_dependencies(control_inputs=assignments): return self.retrieve_indices(buffer_elements=num_buffer_elems, priority_indices=priority_indices)
def __init__(self, actions, actions_logp, actions_entropy, dones, behaviour_logits, target_logits, discount, rewards, values, bootstrap_value, valid_mask, vf_loss_coeff=0.5, entropy_coeff=-0.01, clip_rho_threshold=1.0, clip_pg_rho_threshold=1.0): """Policy gradient loss with vtrace importance weighting. VTraceLoss takes tensors of shape [T, B, ...], where `B` is the batch_size. The reason we need to know `B` is for V-trace to properly handle episode cut boundaries. Args: actions: An int32 tensor of shape [T, B, NUM_ACTIONS]. actions_logp: A float32 tensor of shape [T, B]. actions_entropy: A float32 tensor of shape [T, B]. dones: A bool tensor of shape [T, B]. behaviour_logits: A float32 tensor of shape [T, B, NUM_ACTIONS]. target_logits: A float32 tensor of shape [T, B, NUM_ACTIONS]. discount: A float32 scalar. rewards: A float32 tensor of shape [T, B]. values: A float32 tensor of shape [T, B]. bootstrap_value: A float32 tensor of shape [B]. valid_mask: A bool tensor of valid RNN input elements (#2992). """ # Compute vtrace on the CPU for better perf. with tf.device("/cpu:0"): self.vtrace_returns = vtrace.from_logits( behaviour_policy_logits=behaviour_logits, target_policy_logits=target_logits, actions=tf.cast(actions, tf.int32), discounts=tf.to_float(~dones) * discount, rewards=rewards, values=values, bootstrap_value=bootstrap_value, clip_rho_threshold=tf.cast(clip_rho_threshold, tf.float32), clip_pg_rho_threshold=tf.cast(clip_pg_rho_threshold, tf.float32)) # The policy gradients loss self.pi_loss = -tf.reduce_sum( tf.boolean_mask(actions_logp * self.vtrace_returns.pg_advantages, valid_mask)) # The baseline loss delta = tf.boolean_mask(values - self.vtrace_returns.vs, valid_mask) self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta)) # The entropy loss self.entropy = tf.reduce_sum( tf.boolean_mask(actions_entropy, valid_mask)) # The summed weighted loss self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff + self.entropy * entropy_coeff)
def to_sparse(tensor, lengths, max_length): mask = tf.sequence_mask(lengths, max_length) indices = tf.to_int64(tf.where(tf.equal(mask, True))) values = tf.to_int32(tf.boolean_mask(tensor, mask)) shape = tf.to_int64(tf.shape(tensor)) return tf.SparseTensor(indices, values, shape)
def construct(self, args, convolution, hidden_size, state_shape, num_actions): with self.session.graph.as_default(): self.states = tf.placeholder(tf.float32, [None] + state_shape) self.prev_states = tf.placeholder(tf.float32, [None] + state_shape) self.actions = tf.placeholder(tf.int32, [None]) self.returns = tf.placeholder(tf.float32, [None]) self.weights = tf.placeholder(tf.float32, [None]) input = tf.concat([ tf.image.resize_images(self.states, [32, 32]), tf.image.resize_images(self.prev_states, [32, 32]) ], axis=3) output = input for filters, kernel, stride in convolution: if filters == 0: output = tf.layers.max_pooling2d( inputs=output, pool_size=[kernel, kernel], strides=stride) else: output = tf.layers.conv2d(inputs=output, filters=filters, kernel_size=[kernel, kernel], strides=stride, padding=args.padding) output = tf.nn.relu(output) output = tf.layers.flatten(output) output = tf.layers.dense(output, hidden_size, activation=tf.nn.relu) self.predicted_values = tf.layers.dense(output, num_actions, activation=None) # v_dense = tf.layers.dense(output, hidden_size, activation=tf.nn.relu) # a_dense = tf.layers.dense(output, hidden_size, activation=tf.nn.relu) # # v = tf.layers.dense(v_dense, 1, activation=None) # a = tf.layers.dense(a_dense, num_actions, activation=None) # # self.predicted_values = v + a - tf.reduce_mean(a, 1, keep_dims=True) loss = tf.losses.mean_squared_error( self.returns, tf.boolean_mask(self.predicted_values, tf.one_hot(self.actions, num_actions)), weights=self.weights) global_step = tf.train.create_global_step() self.training = tf.train.AdamOptimizer( args.learning_rate).minimize(loss, global_step=global_step, name="training") self.saver = tf.train.Saver() # Initialize variables self.session.run(tf.global_variables_initializer())
def forward(self, tensors, mode: str = None): """Forward method of the layer""" tensor, mask = tensors return tf.boolean_mask(tensor, mask)
def __init__(self, n_hidden_size, n_class, lr, n_enc_size, n_dec_size, n_enc_vocab_size, n_dec_vocab_size, n_embedding_size): with tf.variable_scope('Input'): self.lr = lr self.n_class = n_class self.n_enc_size = n_enc_size self.n_dec_size = n_dec_size self.n_hidden_size = n_hidden_size self.n_enc_vocab_size = n_enc_vocab_size self.n_dec_vocab_size = n_dec_vocab_size self.n_embedding_size = n_embedding_size with tf.variable_scope('Placeholder'): self.enc_input = tf.placeholder(tf.int64, [None, None], name='enc_input') self.dec_input = tf.placeholder(tf.int64, [None, None], name='dec_input') self.inf_input = tf.placeholder(tf.int64, [None, None], name='inf_input') self.targets = tf.placeholder(tf.int64, [None, None], name='tar_input') self.x_seq_len = tf.placeholder(tf.int64, [None], name="x_seq_len") self.y_seq_len = tf.placeholder(tf.int64, [None], name="y_seq_len") self.dropout_keep = tf.placeholder(tf.float32, name="dropout_keep") with tf.variable_scope('Variable'): # enc_embeddings [ enc_voc_size, embedding_size ] # dec_embeddings [ dec_voc_size, embedding_size ] self.enc_embeddings = tf.Variable(tf.random_normal( [self.n_enc_vocab_size, self.n_embedding_size]), name='enc_embedding') self.dec_embeddings = tf.Variable(tf.random_normal( [self.n_dec_vocab_size, self.n_embedding_size]), name='dec_embedding') with tf.variable_scope('MakeCell'): self.enc_cell = tf.nn.rnn_cell.LSTMCell( num_units=self.n_hidden_size) self.dec_cell = tf.nn.rnn_cell.LSTMCell( num_units=self.n_hidden_size) self.enc_cell = tf.nn.rnn_cell.DropoutWrapper( self.enc_cell, output_keep_prob=self.dropout_keep) self.dec_cell = tf.nn.rnn_cell.DropoutWrapper( self.dec_cell, output_keep_prob=self.dropout_keep) with tf.variable_scope('Embedding'): #enc_embed [ batch, seqlen, embedding_size ] self.enc_embed = tf.nn.embedding_lookup( self.enc_embeddings, self.enc_input, name='enc_embed') # ( enc_voc_size, hidden ) self.dec_embed = tf.nn.embedding_lookup( self.dec_embeddings, self.dec_input, name='dec_embed') # ( dec_voc_size, hidden ) # enc_state [ 2, batch, hidden ] context, hidden # enc_outputs[ batch, seqlen, hidden ] with tf.variable_scope('Encoder'): self.enc_outputs, self.enc_state = \ tf.nn.dynamic_rnn( self.enc_cell, self.enc_embed, sequence_length=self.x_seq_len, dtype=tf.float32 ) self.dec_state = self.enc_state # dec_embed [ batch, seqlen, hidden ] # context [ batch, hidden ] with tf.variable_scope('Decoder'): self.context = self.bahdanau_attention(self.enc_state, self.enc_outputs) self.t_dec_embed = tf.transpose(self.dec_embed, [1, 0, 2]) dec_idx = tf.constant(0) dec_output_tensor = tf.TensorArray(tf.float32, size=self.n_dec_size) def dec_cond(idx, p_state, enc_outputs, outupt_tensor, max_dec_size): return tf.less(idx, max_dec_size) def dec_body(idx, p_state, enc_outputs, dec_output_tensor, max_dec_size): i_dec_embed = tf.gather_nd(self.t_dec_embed, [[idx]]) i_dec_embed = tf.transpose(i_dec_embed, [1, 0, 2]) # [batch, 1, hidden] context_expand = tf.expand_dims(self.context, 1) # [batch, 1, hidden] i_dec_embed_concat = tf.concat( [context_expand, i_dec_embed], axis=-1) # [ batch, 1, hidden*2 ] i_dec_outputs, i_dec_state = tf.nn.dynamic_rnn( self.dec_cell, i_dec_embed_concat, initial_state=p_state, dtype=tf.float32) self.context = self.bahdanau_attention(i_dec_state, self.enc_outputs) i_dec_outputs = tf.reshape(i_dec_outputs, [-1, self.n_hidden_size]) dec_output_tensor = dec_output_tensor.write(idx, i_dec_outputs) return idx + 1, i_dec_state, enc_outputs, dec_output_tensor, max_dec_size self.n_dec_state = tf.nn.rnn_cell.LSTMStateTuple(c=self.context, h=self.dec_state.h) with tf.variable_scope('While'): _, _, _, dec_output_tensor, _ = \ tf.while_loop( cond = dec_cond, body = dec_body, loop_vars=[ dec_idx, self.n_dec_state, self.enc_outputs, dec_output_tensor, self.n_dec_size ] ) self.dec_outputs = dec_output_tensor.stack() self.dec_outputs = tf.transpose(self.dec_outputs, [1, 0, 2]) self.logits = tf.layers.dense(self.dec_outputs, self.n_class, activation=None, reuse=tf.AUTO_REUSE, name='output_dense') self.mask = tf.sequence_mask(self.y_seq_len, n_dec_size) with tf.variable_scope('Loss'): # targets [ batch, dec_voc_size ] self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.targets) # losses = [1, 32, 13] self.t_loss = tf.boolean_mask(self.losses, self.mask) self.loss = tf.reduce_mean(tf.boolean_mask(self.losses, self.mask)) self.optimizer = tf.train.AdamOptimizer(self.lr).minimize( self.loss) with tf.variable_scope('Accuracy'): self.prediction = tf.argmax(self.logits, 2, name='prediction', output_type=tf.int64) prediction_mask = self.prediction * tf.to_int64(self.mask) correct_pred = tf.equal(prediction_mask, self.targets) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"), name="accuracy") with tf.variable_scope('While'): def inf_cond(inf_idx, dec_input_idx, prev_state, output_tensor, max_dec_size): return tf.less(inf_idx, max_dec_size) def inf_body(inf_idx, dec_input_idx, prev_state, output_tensor, max_dec_size): dec_input_embeddings = tf.nn.embedding_lookup( self.dec_embeddings, dec_input_idx) # [ batch, 1, embedding ] [ context_expand = tf.expand_dims(self.context, 1) # [ batch, 1, hidden ] dec_input_embeddings = tf.concat( [context_expand, dec_input_embeddings], axis=-1) dec_outputs, dec_state = tf.nn.dynamic_rnn( self.dec_cell, dec_input_embeddings, sequence_length=[1], initial_state=prev_state, dtype=tf.float32) self.context = self.bahdanau_attention(dec_state, self.enc_outputs) logits = tf.layers.dense(dec_outputs, self.n_class, activation=None, reuse=tf.AUTO_REUSE, name='output_dense') idx_prediction = tf.argmax(logits, 2, output_type=tf.int64, name='idx_prediction') output_tensor = output_tensor.write(inf_idx, idx_prediction) return inf_idx + 1, idx_prediction, dec_state, output_tensor, max_dec_size inf_idx = tf.constant(0) inf_output_tensor = tf.TensorArray(tf.int64, size=self.n_dec_size, name='inf_output_tensor') self.context = self.bahdanau_attention(self.enc_state, self.enc_outputs) self.n_dec_state = tf.nn.rnn_cell.LSTMStateTuple( c=self.context, h=self.dec_state.h) _, _, _, inf_output_tensor, _ = \ tf.while_loop( cond = inf_cond, body = inf_body, loop_vars=[ inf_idx, self.inf_input, self.n_dec_state, inf_output_tensor, self.n_dec_size ]) self.inf_result = inf_output_tensor.stack() self.inf_result = tf.reshape(self.inf_result, [-1], 'inf_result')
def model_fn(mode, inputs, params, reuse=False): """Model function defining the graph operations. Args: mode: (string) can be 'train' or 'eval' inputs: (dict) contains the inputs of the graph (features, labels...) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) contains hyperparameters of the model (ex: `params.learning_rate`) reuse: (bool) whether to reuse the weights Returns: model_spec: (dict) contains the graph operations or nodes needed for training / evaluation """ is_training = (mode == 'train') labels = inputs['labels'] images = inputs['images'] # ----------------------------------------------------------- # MODEL: define the layers of the model with tf.variable_scope('model', reuse=reuse): # Compute the output distribution of the model and the predictions predictions = build_model(is_training, inputs, params) # Define loss and similarity loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions) size = params.image_size predictions_reshape = tf.reshape(predictions, [-1, size * size]) predictions_reshape = tf.nn.l2_normalize(predictions_reshape, [1]) labels_reshape = tf.reshape(labels, [-1, size * size]) labels_reshape = tf.nn.l2_normalize(labels_reshape, [1]) images_reshape = tf.reshape(images, [-1, size * size]) images_reshape = tf.nn.l2_normalize(images_reshape, [1]) orig_similarity = (tf.reduce_sum(tf.multiply(images_reshape, labels_reshape), axis=1)) new_similarity = (tf.reduce_sum(tf.multiply(predictions_reshape, labels_reshape), axis=1)) similarity_progress = tf.reduce_mean(new_similarity - orig_similarity) similarity = tf.reduce_mean(new_similarity) # Define training step that minimizes the loss with the Adam optimizer if is_training: optimizer = tf.train.AdamOptimizer(params.learning_rate) global_step = tf.train.get_or_create_global_step() if params.use_batch_norm: # Add a dependency to update the moving mean and variance for batch normalization with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_op = optimizer.minimize(loss, global_step=global_step) else: train_op = optimizer.minimize(loss, global_step=global_step) # ----------------------------------------------------------- # METRICS AND SUMMARIES # Metrics for evaluation using tf.metrics (average over whole dataset) with tf.variable_scope("metrics"): metrics = { 'similarity': tf.metrics.mean(similarity), 'loss': tf.metrics.mean(loss), 'similarity_progress': tf.metrics.mean(similarity_progress) } # Group the update ops for the tf.metrics update_metrics_op = tf.group(*[op for _, op in metrics.values()]) # Get the op to reset the local variables used in tf.metrics metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="metrics") metrics_init_op = tf.variables_initializer(metric_variables) # Summaries for training tf.summary.scalar('loss', loss) tf.summary.scalar('similarity', similarity) tf.summary.scalar('similarity_progress', similarity_progress) # tf.summary.image('train_image', inputs['images']) #TODO: if mode == 'eval': ? # Add incorrectly labeled images similarity_arr = (tf.reduce_sum(tf.multiply(predictions_reshape, labels_reshape), axis=1)) mask = tf.greater(0.5, similarity_arr) # Add a different summary to know how they were misclassified incorrect_train_image = tf.boolean_mask(inputs['images'], mask) tf.summary.image('incorrectly_train', incorrect_train_image) incorrect_predict_image = tf.boolean_mask(predictions, mask) tf.summary.image('incorrectly_predict', incorrect_predict_image) incorrect_image_label = tf.boolean_mask(labels, mask) tf.summary.image('incorrect_label', incorrect_image_label) # ----------------------------------------------------------- # MODEL SPECIFICATION # Create the model specification and return it # It contains nodes or operations in the graph that will be used for training and evaluation model_spec = inputs model_spec['variable_init_op'] = tf.global_variables_initializer() model_spec["predictions"] = predictions model_spec['loss'] = loss model_spec['similarity'] = similarity model_spec['similarity_progress'] = similarity_progress model_spec['metrics_init_op'] = metrics_init_op model_spec['metrics'] = metrics model_spec['update_metrics'] = update_metrics_op model_spec['summary_op'] = tf.summary.merge_all() if is_training: model_spec['train_op'] = train_op return model_spec
def preprocess(image, bbox, input_shape=(416, 416), random=False, jitter=.3, hue=.1, sat=1.5, bri=.1): # NOTE: input_shape is given in (input height, input width) order assert 3 == len(image.shape) and 3 == image.shape[-1] assert 0 < jitter < 1 assert -1 < hue < 1 assert 0 < sat assert 0 < bri < 1 # add batch dimension image = tf.expand_dims(image, axis=0) img_shape = image.shape[1:3] #(height, width) if False == random: # scale the input image to make the wider edge fit the input shape # NOTE: I don't use resize_with_pad because it can only stuff zeros, but I want 128 resize_image = tf.image.resize(image, input_shape, method=tf.image.ResizeMethod.BICUBIC, preserve_aspect_ratio=True) resize_shape = resize_image.shape[1:3] #(height, width) top_pad = (input_shape[0] - resize_shape[0]) // 2 bottom_pad = input_shape[0] - resize_shape[0] - top_pad left_pad = (input_shape[1] - resize_shape[1]) // 2 right_pad = input_shape[1] - resize_shape[1] - left_pad resize_image = tf.pad( resize_image, [[0, 0], [top_pad, bottom_pad], [left_pad, right_pad], [0, 0]], constant_values=128) # cast to float32 image_data = tf.cast(resize_image, tf.float32) / 255. # correct boxes bbox = bbox * tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) bbox = bbox + tf.convert_to_tensor( [top_pad, left_pad, top_pad, left_pad], dtype=tf.float32) bbox = bbox / tf.convert_to_tensor( [input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=tf.float32) # return return image_data, bbox else: # randomly sample aspect ratio to input shape # resize image to the randomly sampled input shape aspect_ratio_jitter = tf.random.uniform(shape=[2], minval=1 - jitter, maxval=1 + jitter, dtype=tf.float32) resize_input_shape = tf.convert_to_tensor( input_shape, dtype=tf.float32) * aspect_ratio_jitter scale = tf.random.uniform(shape=[1], minval=.8, maxval=1.2, dtype=tf.float32) resize_shape = tf.cond(tf.greater(resize_input_shape[0], resize_input_shape[1]), true_fn=lambda: scale * resize_input_shape / aspect_ratio_jitter[0], false_fn=lambda: scale * resize_input_shape / aspect_ratio_jitter[1]) resize_shape = tf.cast(resize_shape, dtype=tf.int32) resize_image = tf.image.resize(image, resize_shape, method=tf.image.ResizeMethod.BICUBIC) if input_shape[0] > resize_shape[0]: pad = input_shape[0] - resize_shape[0] resize_image = tf.pad(resize_image, [[0, 0], [pad, pad], [0, 0], [0, 0]], constant_values=128) # sample crop offset_height offset_height = tf.random.uniform(maxval=pad + 1, dtype=tf.int32, shape=()) # correct boxes bbox = bbox * tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) bbox = bbox + tf.convert_to_tensor([pad, 0, pad, 0], dtype=tf.float32) resize_shape = resize_shape + tf.convert_to_tensor([2 * pad, 0], dtype=tf.int32) bbox = bbox / tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) else: crop = resize_shape[0] - input_shape[0] # sample crop offset_height offset_height = tf.random.uniform(maxval=crop + 1, dtype=tf.int32, shape=()) if input_shape[1] > resize_shape[1]: pad = input_shape[1] - resize_shape[1] resize_image = tf.pad(resize_image, [[0, 0], [0, 0], [pad, pad], [0, 0]], constant_values=128) # sample crop offset_width offset_width = tf.random.uniform(maxval=pad + 1, dtype=tf.int32, shape=()) # correct boxes bbox = bbox * tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) bbox = bbox + tf.convert_to_tensor([0, pad, 0, pad], dtype=tf.float32) resize_shape = resize_shape + tf.convert_to_tensor([0, 2 * pad], dtype=tf.int32) bbox = bbox / tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) else: crop = resize_shape[1] - input_shape[1] # sample crop offset_width offset_width = tf.random.uniform(maxval=crop + 1, dtype=tf.int32, shape=()) # crop resize_image = tf.image.crop_to_bounding_box(resize_image, offset_height, offset_width, input_shape[0], input_shape[1]) # correct boxes bbox = bbox * tf.convert_to_tensor([ resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1] ], dtype=tf.float32) bbox = bbox + tf.convert_to_tensor( [-offset_height, -offset_width, -offset_height, -offset_width], dtype=tf.float32) bbox = bbox / tf.convert_to_tensor( [input_shape[0], input_shape[1], input_shape[0], input_shape[1]], dtype=tf.float32) # randomly flip image if np.random.rand() < .5: resize_image = tf.image.flip_left_right(resize_image) # correct boxes(y remains while x = 1 - x) bbox = tf.convert_to_tensor( [0, 1, 0, 1], dtype=tf.float32) + tf.convert_to_tensor( [1, -1, 1, -1], dtype=tf.float32) * bbox # distort image in HSV color space image_data = tf.cast(resize_image, tf.float32) / 255. image_data = tf.image.random_hue(image_data, hue) image_data = tf.image.random_saturation(image_data, lower=1. / sat, upper=sat) image_data = tf.image.random_brightness(image_data, bri) # discard invalid boxes (small box or box having negative width or height) bbox = tf.clip_by_value(bbox, 0, 1) # restrict the min and max coordinates bbox_hw = bbox[..., 2:4] - bbox[..., 0:2] # bbox_hw.shape = (bbox_num,2) bbox_hw = bbox_hw * tf.convert_to_tensor(input_shape, dtype=tf.float32) valid = tf.math.logical_and(bbox_hw[..., 0] > 1, bbox_hw[..., 1] > 1) # valid.shape = (bbox_num) valid_bbox = tf.boolean_mask(bbox, valid) # valid_bbox.shape = (valid box num, 4) assert (valid_bbox.shape[1] != 0) # return return tf.squeeze(image_data), bbox
parse = l_datasets.Parse(l_config.train_image_dir, anchors, l_config.grid_sizes, l_config.image_target_size) train_ds = tf.data.TextLineDataset(l_config.train_label_file) train_ds = train_ds.map(parse) flat_anchors = [tf.reshape(item, (-1, 4)) for item in anchors] flat_anchors = tf.concat(flat_anchors, 0) for index, (image, label) in enumerate(train_ds.take(3)): image = (image + 1.0) * 127.5 images = [image] layer_conf = label[1] mask = layer_conf[..., 0] == 1 mask_boxes = tf.boolean_mask(label[0], mask) mask_anchors = tf.boolean_mask(flat_anchors, mask) mask_cates = tf.boolean_mask(label[2][..., 0], mask) cates = tf.boolean_mask(label[2][..., 0], mask) images = tf.image.draw_bounding_boxes(images, [mask_boxes], [[0, 255, 0]]) images = tf.image.draw_bounding_boxes(images, [mask_anchors], [[255, 0, 0]]) image = images[0].numpy().astype(np.int32) cv2_loca = mask_boxes.numpy()[..., :2] * np.array( l_config.image_target_size) cv2_loca = cv2_loca.astype(np.int32) cv2_loca[..., 0] += 10 cv2_cate = mask_cates.numpy().astype(np.int32) for index, (loca, cate) in enumerate(zip(cv2_loca, cv2_cate)):
def _streaming_tp_fp_array(num_gt_boxes, tp, fp, scores, class_name, remove_zero_scores=True, metrics_collections=None, updates_collections=None, name=None): """Streaming computation of True Positive and False Positive arrays. This metrics also keeps track of scores and number of grountruth objects. """ default_name = 'streaming_tp_fp_{}'.format(class_name) # Input Tensors... with variable_scope.variable_scope(name, default_name, [num_gt_boxes, tp, fp, scores]): tp = tf.cast(tp, tf.bool) fp = tf.cast(fp, tf.bool) scores = tf.to_float(scores) num_gt_boxes = tf.to_int64(num_gt_boxes) # Reshape TP and FP tensors and clean away 0 class values. tp = tf.reshape(tp, [-1]) fp = tf.reshape(fp, [-1]) scores = tf.reshape(scores, [-1]) # Remove TP and FP both false. if remove_zero_scores: mask = tf.logical_or(tp, fp) rm_threshold = 1e-4 mask = tf.logical_and(mask, tf.greater(scores, rm_threshold)) tp = tf.boolean_mask(tp, mask) fp = tf.boolean_mask(fp, mask) scores = tf.boolean_mask(scores, mask) # Local variables accumlating information over batches. tp_value = metrics_impl._create_local('tp_value', shape=[ 0, ], dtype=tf.bool, validate_shape=False) fp_value = metrics_impl._create_local('fp_value', shape=[ 0, ], dtype=tf.bool, validate_shape=False) scores_value = metrics_impl._create_local('scores_value', shape=[ 0, ], validate_shape=False) num_gt_boxes_value = metrics_impl._create_local('num_gt_boxes_value', shape=[], dtype=tf.int64) # Update operations. tp_op = tf.assign(tp_value, tf.concat([tp_value, tp], axis=0), validate_shape=False) fp_op = tf.assign(fp_value, tf.concat([fp_value, fp], axis=0), validate_shape=False) scores_op = tf.assign(scores_value, tf.concat([scores_value, scores], axis=0), validate_shape=False) num_gt_boxes_op = tf.assign_add(num_gt_boxes_value, num_gt_boxes) # Value and update ops. values = (tp_value, fp_value, scores_value, num_gt_boxes_value) update_ops = (tp_op, fp_op, scores_op, num_gt_boxes_op) if metrics_collections: ops.add_to_collections(metrics_collections, values) if updates_collections: ops.add_to_collections(updates_collections, update_ops) update_op = tf.group(*update_ops) return values, update_op