def loss(y_true, y_pred): """ Parameters ---------- y_true : keras tensor True values to predict. It is assumed that this keras tensor includes extra columns to store the index of the data sample in the training set. y_pred : keras tensor Prediction made by the model. """ y_shape = K.shape(y_pred) y_true_ = K.reshape(y_true[:, :-1], y_shape) if nout > 1: diff_sq = K.sum(K.square(y_true_ - y_pred), axis=-1) else: diff_sq = K.square(y_true_ - y_pred) term_normal = diff_sq / (2. * sigmaSQ) + 0.5 * K.log(sigmaSQ) + 0.5 * K.log(2. * np.pi) - K.log(a) term_cauchy = K.log(1. + diff_sq / gammaSQ) + 0.5 * K.log(piSQ * gammaSQ) - K.log(1. - a) batch_index = K.cast(y_true[:, -1], 'int64') T_0_red = K.gather(T_k[:, 0], batch_index) T_1_red = K.gather(T_k[:, 1], batch_index) return K.sum(T_0_red * term_normal + T_1_red * term_cauchy)
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算逐标签accuracy histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(isequal * mask) / K.sum(mask)
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5, eager = False): if eager: image_shape = K.reshape(yolo_outputs[-1],[-1]) num_layers = len(yolo_outputs)-1 else: # 获得特征层的数量 num_layers = len(yolo_outputs) # 特征层1对应的anchor是678 # 特征层2对应的anchor是345 # 特征层3对应的anchor是012 anchor_mask = [[3, 4, 5], [1, 2, 3]] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] # 对每个特征层进行处理 for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) # 将每个特征层的结果进行堆叠 boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # 取出所有box_scores >= score_threshold的框,和成绩 class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) # 非极大抑制,去掉box重合程度高的那一些 nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) # 获取非极大抑制后的结果 # 下列三个分别是 # 框的位置,得分与种类 class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def call(self, inputs): """如果custom_position_ids,那么第二个输入为自定义的位置id """ if self.custom_position_ids: inputs, position_ids = inputs if 'int' not in K.dtype(position_ids): position_ids = K.cast(position_ids, 'int32') else: input_shape = K.shape(inputs) batch_size, seq_len = input_shape[0], input_shape[1] position_ids = K.arange(0, seq_len, dtype='int32')[None] if self.hierarchical: alpha = 0.4 if self.hierarchical is True else self.hierarchical embeddings = self.embeddings - alpha * self.embeddings[:1] embeddings = embeddings / (1 - alpha) embeddings_x = K.gather(embeddings, position_ids // self.input_dim) embeddings_y = K.gather(embeddings, position_ids % self.input_dim) embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y else: if self.custom_position_ids: embeddings = K.gather(self.embeddings, position_ids) else: embeddings = self.embeddings[None, :seq_len] if self.merge_mode == 'add': return inputs + embeddings elif self.merge_mode == 'mul': return inputs * embeddings else: if not self.custom_position_ids: embeddings = K.tile(embeddings, [batch_size, 1, 1]) return K.concatenate([inputs, embeddings])
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input batch and return filtered boxes.""" box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes, scores, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def basic_loss(self, y_true, y_pred, go_backwards=False): """y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算loss histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 loss = K.sparse_categorical_crossentropy( y_true, y_pred, from_logits=True ) return K.sum(loss * mask) / K.sum(mask)
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): """ Applies Non-max suppression (NMS) to set of boxes Arguments: scores -- tensor of shape (None,), output of yolo_filter_boxes() boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later) classes -- tensor of shape (None,), output of yolo_filter_boxes() max_boxes -- integer, maximum number of predicted boxes you'd like iou_threshold -- real value, "intersection over union" threshold used for NMS filtering Returns: scores -- tensor of shape (, None), predicted score for each box boxes -- tensor of shape (4, None), predicted box coordinates classes -- tensor of shape (, None), predicted class for each box Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this function will transpose the shapes of scores, boxes, classes. This is made for convenience. """ max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.variables_initializer([max_boxes_tensor])) # Initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) # Use K.gather() to select only nms_indices from scores, boxes and classes scores = K.gather(scores,nms_indices) boxes = K.gather(boxes,nms_indices) classes = K.gather(classes,nms_indices) return scores, boxes, classes
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=100, score_threshold=.5, iou_threshold=.4): # score threshold was 0.6 num_layers = len(yolo_outputs) anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def accuracy_mod(y_true, y_pred): # Squeeze the shape to (None, ) from (None, 1) as we want to apply operations directly on y_true if K.ndim(y_true) == K.ndim(y_pred): y_true = K.squeeze(y_true, -1) # Normalize the y_pred values first and then take the arg at which we have a maximum value (This is the predicted label) y_pred = K.softmax(y_pred, axis = -1) y_pred = K.argmax(y_pred, axis = -1) # Since the ground labels can also have -1s for which we don't wanna calculate accuracy, we are filtering them off defa = K.constant([0], dtype=tf.float32) #Creating a boolean tensor for labels greater or equal to 0 is_valid = K.greater_equal(y_true, defa) #Get the corresponding indices indices = tf.where(is_valid) #Gather the results of y_true and y_pred at the indices we calculated above fil_y_true = K.gather(y_true, K.reshape(indices, [-1])) fil_y_pred = K.gather(y_pred, K.reshape(indices, [-1])) # K.print_tensor(res, message='res = ') # K.print_tensor(comp, message='comp = ') fil_y_true = K.cast(fil_y_true, K.floatx()) fil_y_pred = K.cast(fil_y_pred, K.floatx()) #pdb.set_trace() return K.cast(K.equal(fil_y_true, fil_y_pred), K.floatx())
def single_image_nms(b, batch_boxes, batch_scores, batch_classes): boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes[b], mask[b, :, c]) class_box_scores = tf.boolean_mask(box_scores[b, :, c], mask[b, :, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) batch_boxes = batch_boxes.write(b, boxes_) batch_scores = batch_scores.write(b, scores_) batch_classes = batch_classes.write(b, classes_) return b+1, batch_boxes, batch_scores, batch_classes
def _process_sample(args): _hm, _reg, _wh = args _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True) _classes = K.cast(_inds % cat, 'float32') _inds = K.cast(_inds / cat, 'int32') _xs = K.cast(_inds % width, 'float32') _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32') _wh = K.gather(_wh, _inds) _reg = K.gather(_reg, _inds) _xs = _xs + _reg[..., 0] _ys = _ys + _reg[..., 1] _x1 = _xs - _wh[..., 0] / 2 _y1 = _ys - _wh[..., 1] / 2 _x2 = _xs + _wh[..., 0] / 2 _y2 = _ys + _wh[..., 1] / 2 # rescale to image coordinates _x1 = output_stride * _x1 _y1 = output_stride * _y1 _x2 = output_stride * _x2 _y2 = output_stride * _y2 _detection = K.stack([_x1, _y1, _x2, _y2, _scores, _classes], -1) return _detection
def _process_channel(args): __kps, __hm_hp = args thresh = 0.1 __hm_scores, __hm_inds = tf.math.top_k(__hm_hp, k=k, sorted=True) __hm_xs = K.cast(__hm_inds % width, 'float32') __hm_ys = K.cast(K.cast(__hm_inds / width, 'int32'), 'float32') __hp_offset = K.gather(_hp_offset, __hm_inds) __hm_xs = __hm_xs + __hp_offset[..., 0] __hm_ys = __hm_ys + __hp_offset[..., 1] mask = K.cast(__hm_scores > thresh, 'float32') __hm_scores = (1. - mask) * -1. + mask * __hm_scores __hm_xs = (1. - mask) * -10000. + mask * __hm_xs __hm_ys = (1. - mask) * -10000. + mask * __hm_ys __hm_kps = K.stack([__hm_xs, __hm_ys], -1) # k x 2 __broadcast_hm_kps = K.expand_dims(__hm_kps, 1) # k x 1 x 2 __broadcast_kps = K.expand_dims(__kps, 0) # 1 x k x 2 dist = K.sqrt( K.sum(K.pow(__broadcast_kps - __broadcast_hm_kps, 2), 2)) # k, k min_dist = K.min(dist, 0) min_ind = K.argmin(dist, 0) __hm_scores = K.gather(__hm_scores, min_ind) __hm_kps = K.gather(__hm_kps, min_ind) mask = (K.cast(__hm_kps[..., 0] < _x1, 'float32') + K.cast(__hm_kps[..., 0] > _x2, 'float32') + K.cast(__hm_kps[..., 1] < _y1, 'float32') + K.cast(__hm_kps[..., 1] > _y2, 'float32') + K.cast(__hm_scores < thresh, 'float32') + K.cast( min_dist > 0.3 * (K.maximum(_wh[..., 0], _wh[..., 1])), 'float32')) mask = K.expand_dims(mask, -1) mask = K.cast(mask > 0, 'float32') __kps = (1. - mask) * __hm_kps + mask * __kps return __kps
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5): max_boxes_tensor = tf.keras.backend.variable( max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() tf.keras.backend.get_session().run( tf.variables_initializer([max_boxes_tensor ])) # initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep ### START CODE HERE ### (≈ 1 line) nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold) ### END CODE HERE ### # Use K.gather() to select only nms_indices from scores, boxes and classes ### START CODE HERE ### (≈ 3 lines) scores = k.gather(scores, nms_indices) boxes = k.gather(boxes, nms_indices) classes = k.gather(classes, nms_indices) return scores, boxes, classes
def filter_detections(args): boxes_ = args[0] classification_ = args[1] def filter_detection(scores_, labels_): indices_ = tf.where( backend.greater(scores_, self.score_threshold)) if self.nms: filtered_boxes = tf.gather_nd(boxes_, indices_) filtered_scores = backend.gather(scores_, indices_)[:, 0] nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, self.max_detections, 0.1) indices_ = backend.gather(indices_, nms_indices) labels_ = tf.gather_nd(labels_, indices_) indices_ = backend.stack([indices_[:, 0], labels_], axis=1) return indices_ if self.class_specific_filter: all_indices = [] for c in range(int(classification_.shape[1])): scores = classification_[:, c] labels = c * tf.ones( (backend.shape(scores)[0], ), dtype='int64') all_indices.append(filter_detection(scores, labels)) indices = backend.concatenate(all_indices, axis=0) else: scores = backend.max(classification_, axis=1) labels = backend.argmax(classification_, axis=1) indices = filter_detection(scores, labels) scores = tf.gather_nd(classification_, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k(scores, k=backend.minimum( self.max_detections, backend.shape(scores)[0])) indices = backend.gather(indices[:, 0], top_indices) boxes_ = backend.gather(boxes_, indices) labels = backend.gather(labels, top_indices) pad_size = backend.maximum( 0, self.max_detections - backend.shape(scores)[0]) boxes_ = tf.pad(boxes_, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = backend.cast(labels, 'int32') boxes_.set_shape([self.max_detections, 4]) scores.set_shape([self.max_detections]) labels.set_shape([self.max_detections]) return [boxes_, scores, labels]
def yolo5_postprocess(args, anchors, num_classes, max_boxes=100, confidence=0.1, iou_threshold=0.4, elim_grid_sense=True): """Postprocess for YOLOv5 model on given input and return filtered boxes.""" num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] image_shape = args[num_layers] if num_layers == 3: anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] # YOLOv5 enable "elim_grid_sense" by default scale_x_y = [2.0, 2.0, 2.0] #if elim_grid_sense else [None, None, None] else: anchor_mask = [[3,4,5], [0,1,2]] scale_x_y = [1.05, 1.05] #if elim_grid_sense else [None, None] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 # print("yolo_outputs",yolo_outputs) boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo5_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, scale_x_y=scale_x_y[l]) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= confidence max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def _gather_channels(x, indexes): """Slice tensor along channels axis by given indexes""" if backend.image_data_format() == 'channels_last': x = backend.permute_dimensions(x, (3, 0, 1, 2)) x = backend.gather(x, indexes) x = backend.permute_dimensions(x, (1, 2, 3, 0)) else: x = backend.permute_dimensions(x, (1, 0, 2, 3)) x = backend.gather(x, indexes) x = backend.permute_dimensions(x, (1, 0, 2, 3)) return x
def yolo_eval( yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=0.6, iou_threshold=0.5, ): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = ([[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]) # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, ) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype="int32") boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(tensor=boxes, mask=mask[:, c]) class_box_scores = tf.boolean_mask(tensor=box_scores[:, c], mask=mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, "int32") * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def _interpolate(image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = .5 * (x + 1.0) * K.cast(height - 1, dtype='float32') y = .5 * (y + 1.0) * K.cast(width - 1, dtype='float32') x0 = K.cast(x, 'int32') x1 = x0 + 1 y0 = K.cast(y, 'int32') y1 = y0 + 1 max_x = int(K.int_shape(image)[1] - 1) max_y = int(K.int_shape(image)[2] - 1) x0 = K.clip(x0, 0, max_x) x1 = K.clip(x1, 0, max_x) y0 = K.clip(y0, 0, max_y) y1 = K.clip(y1, 0, max_y) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output_size = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) base_y0 = y0 * width base_y0 = base + base_y0 base_y1 = y1 * width base_y1 = base_y1 + base indices_a = base_y0 + x0 indices_b = base_y1 + x0 print(x1.dtype, x.dtype, base_y0.dtype, base_y1.dtype) indices_c = base_y0 + x1 indices_d = base_y1 + x1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_values_a = K.gather(flat_image, indices_a) pixel_values_b = K.gather(flat_image, indices_b) pixel_values_c = K.gather(flat_image, indices_c) pixel_values_d = K.gather(flat_image, indices_d) x0 = K.cast(x0, 'float32') x1 = K.cast(x1, 'float32') y0 = K.cast(y0, 'float32') y1 = K.cast(y1, 'float32') area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) area_d = K.expand_dims(((x - x0) * (y - y0)), 1) values_a = area_a * pixel_values_a values_b = area_b * pixel_values_b values_c = area_c * pixel_values_c values_d = area_d * pixel_values_d return values_a + values_b + values_c + values_d
def _center_loss_func(labels, features, alpha, num_classes, centers, feature_dim): assert feature_dim == features.get_shape()[1] labels = K.reshape(labels, [-1]) #labels = K.argmax(labels, axis=1) labels = tf.to_int32(labels) centers_batch = K.gather(centers, labels) diff = (1 - alpha) * (centers_batch - features) centers = tf.scatter_sub(centers, labels, diff) centers_batch = K.gather(centers, labels) loss = K.mean(K.square(features - centers_batch)) return loss
def _filter_detections(scores, labels): indices = tf.where(K.greater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = K.gather(scores, indices)[:, 0] nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, max_output_size = max_detections, iou_threshold = nms_threshold ) indices = K.gather(indices, nms_indices) labels = tf.gather_nd(labels, indices) indices = K.stack([indices[:, 0], labels], axis = 1) return indices
def filter_detections(boxes, classification, other = [], class_specific_filter = True, nms = True, score_threshold = 0.5, max_detections = 300, nms_threshold = 0.5): def _filter_detections(scores, labels): indices = tf.where(K.greater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = K.gather(scores, indices)[:, 0] nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, max_output_size = max_detections, iou_threshold = nms_threshold ) indices = K.gather(indices, nms_indices) labels = tf.gather_nd(labels, indices) indices = K.stack([indices[:, 0], labels], axis = 1) return indices if class_specific_filter: all_indices = [] for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * tf.ones((K.shape(scores)[0], ), dtype = 'int64') all_indices.append(_filter_detections(scores, labels)) indices = K.concatenate(all_indices, axis=0) else: scores = K.max(classification, axis = 1) labels = K.argmax(classification, axis = 1) indices = _filter_detections(scores, labels) scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k(scores, k = K.minimum( max_detections, K.shape(scores)[0] )) indices = K.gather(indices[:, 0], top_indices) boxes = K.gather(boxes, indices) labels = K.gather(labels, top_indices) other_ = [K.gather(o, indices) for o in other] pad_size = K.minimum(0, max_detections - K.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values = -1) scores = tf.pad(scores, [[0, pad_size]], constant_values = -1) labels = tf.pad(labels, [[0, pad_size]], constant_values = -1) other_ = [ tf.pad(o, [[0, pad_size]] + [[0, 0]]) for _ in range(1, len(o.shape)) for o in other_ ] boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(K.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_
def compute_nms(args): boxes, classification = args def nms_fn(score, label): score_indices = tf.where(backend.greater(score, config.score_threshold)) filtered_boxes = tf.gather_nd(boxes, score_indices) filtered_scores = backend.gather(score, score_indices)[:, 0] nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, config.max_boxes) score_indices = backend.gather(score_indices, nms_indices) label = tf.gather_nd(label, score_indices) score_indices = backend.stack([score_indices[:, 0], label], axis=1) return score_indices all_indices = [] for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * tf.ones((backend.shape(scores)[0], ), dtype='int64') all_indices.append(nms_fn(scores, labels)) indices = backend.concatenate(all_indices, axis=0) scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k(scores, k=backend.minimum( config.max_boxes, backend.shape(scores)[0])) indices = backend.gather(indices[:, 0], top_indices) boxes = backend.gather(boxes, indices) labels = backend.gather(labels, top_indices) pad_size = backend.maximum(0, config.max_boxes - backend.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = backend.cast(labels, 'int32') boxes.set_shape([config.max_boxes, 4]) scores.set_shape([config.max_boxes]) labels.set_shape([config.max_boxes]) return [boxes, scores, labels]
def eval_img(self, y_pred, image_shape, score_threshold, iou_threshold, max_boxes=200): ''' :param image_data: :param score_threshold: :param image_shape: :param max_boxes: todo 此次预设数量会约束识别出数量 :return: ''' image_shape = tf.constant(image_shape) num_classes = len(self.class_names) num_layers = len(y_pred) input_shape = K.shape(y_pred[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = self.boxes_and_scores(y_pred[l], self.anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def nms_fn(score, label): score_indices = tf.where(backend.greater(score, config.threshold)) filtered_boxes = tf.gather_nd(boxes, score_indices) filtered_scores = backend.gather(score, score_indices)[:, 0] nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, config.max_boxes, 0.1) score_indices = backend.gather(score_indices, nms_indices) label = tf.gather_nd(label, score_indices) score_indices = backend.stack([score_indices[:, 0], label], axis=1) return score_indices
def multilabel_dice_coefficient_fixed(y_true, y_pred): y_dims = K.int_shape(y_pred) number_of_labels = y_dims[len(y_dims) - 1] if dimensionality == 2: # 2-D image y_true_permuted = K.permute_dimensions(y_true, pattern=(3, 0, 1, 2)) y_pred_permuted = K.permute_dimensions(y_pred, pattern=(3, 0, 1, 2)) elif dimensionality == 3: # 3-D image y_true_permuted = K.permute_dimensions(y_true, pattern=(4, 0, 1, 2, 3)) y_pred_permuted = K.permute_dimensions(y_pred, pattern=(4, 0, 1, 2, 3)) else: raise ValueError("Specified dimensionality not implemented.") y_true_label = K.gather(y_true_permuted, indices=(1)) y_pred_label = K.gather(y_pred_permuted, indices=(1)) y_true_label_f = K.flatten(y_true_label) y_pred_label_f = K.flatten(y_pred_label) intersection = y_true_label_f * y_pred_label_f union = y_true_label_f + y_pred_label_f - intersection numerator = K.sum(intersection) denominator = K.sum(union) if number_of_labels > 2: for j in range(2, number_of_labels): y_true_label = K.gather(y_true_permuted, indices=(j)) y_pred_label = K.gather(y_pred_permuted, indices=(j)) y_true_label_f = K.flatten(y_true_label) y_pred_label_f = K.flatten(y_pred_label) intersection = y_true_label_f * y_pred_label_f union = y_true_label_f + y_pred_label_f - intersection numerator = numerator + K.sum(intersection) denominator = denominator + K.sum(union) unionOverlap = numerator / denominator return (-1.0 * (2.0 * unionOverlap + smoothing_factor) / (1.0 + unionOverlap + smoothing_factor))
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') embeddings = K.gather(self.embeddings, inputs) if self._scale: embeddings *= self._model_dim**0.5 # Scale return embeddings
def path_energy0(y, x, U, mask=None): '''Path energy without boundary potential handling.''' n_classes = K.shape(x)[2] y_one_hot = K.one_hot(y, n_classes) # Tag path energy energy = K.sum(x * y_one_hot, 2) energy = K.sum(energy, 1) # Transition energy y_t = y[:, :-1] y_tp1 = y[:, 1:] U_flat = K.reshape(U, [-1]) # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat: flat_indices = y_t * n_classes + y_tp1 U_y_t_tp1 = K.gather(U_flat, flat_indices) if mask is not None: mask = K.cast(mask, K.floatx()) y_t_mask = mask[:, :-1] y_tp1_mask = mask[:, 1:] U_y_t_tp1 *= y_t_mask * y_tp1_mask energy += K.sum(U_y_t_tp1, axis=1) return energy
def call(self, inputs, **kwargs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') # gather 将inputs的元素以inputs对应的值为索引替换为embedding位置的向量 embeddings = K.gather(self.embeddings, inputs) embeddings *= self.model_dim ** 0.5 return embeddings
def encoder(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') masks = K.equal(inputs, 0) # Embeddings embeddings = K.gather(self.embeddings, inputs) embeddings *= self._model_dim ** 0.5 # Scale # Position Encodings position_encodings = self.EncoderPositionEncoding(embeddings) # Embedings + Postion-encodings encodings = embeddings + position_encodings # Dropout encodings = K.dropout(encodings, self._dropout_rate) for i in range(self._encoder_stack): # Multi-head-Attention attention = self.EncoderMultiHeadAttetions[i] attention_input = [encodings, encodings, encodings, masks] attention_out = attention(attention_input) # Add & Norm attention_out += encodings attention_out = self.EncoderLayerNorms0[i](attention_out) # Feed-Forward ff = self.EncoderPositionWiseFeedForwards[i] ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = self.EncoderLayerNorms1[i](ff_out) return encodings, masks
def aucMetric(true, pred): #We want strictly 1D arrays - cannot have (batch, 1), for instance true = (true - K.min(true)) / (K.max(true) - K.min(true)) pred = (pred - K.min(pred)) / (K.max(pred) - K.min(pred)) true = K.flatten(true) pred = K.flatten(pred) #total number of elements in this batch totalCount = K.shape(true)[0] #sorting the prediction values in descending order values, indices = tf.nn.top_k(pred, k=totalCount) #sorting the ground truth values based on the predictions above sortedTrue = K.gather(true, indices) #getting the ground negative elements (already sorted above) negatives = 1 - sortedTrue #the true positive count per threshold TPCurve = K.cumsum(sortedTrue) #area under the curve auc = K.sum(TPCurve * negatives) #normalizing the result between 0 and 1 totalCount = K.cast(totalCount, K.floatx()) positiveCount = K.sum(true) negativeCount = totalCount - positiveCount totalArea = positiveCount * negativeCount return auc / totalArea