def process_train_dataset(model, train_p, batch_size): train_p.dataset_map( train_p.parse_detection_example(train_p.image_feature_map, len(model.num_classes), model.channels)) train_p.dataset_map(train_p.tf_augment_data) train_p.dataset_shuffle(get_flag("batch_size", batch_size)) train_p.dataset_batch(get_flag("batch_size", batch_size)) train_p.dataset_map(lambda x, y: (x, train_p.transform_targets( y, model.grid_factor, model.anchors, get_anchor_masks(model.anchors))))
def nms(preds, score_threshold=get_flag("yolo_score_threshold", 0.2), iou_threshold=get_flag("yolo_iou_threshold", 0.45), max_output=get_flag("yolo_max_boxes", 10), test_image_shape=None): boxes, scores_combined, scores = tf.split(preds, (4, 1, -1), axis=-1) boxes = rescale_pred_wh(boxes, test_image_shape) @jit(nopython=True) def _nms(boxes, scores_combined, scores): n_sample = boxes.shape[0] selected_boxes = [] selected_scores = [] # selected_boxes = np.zeros((max_output * n_sample, 4), dtype=np.float32) # selected_scores = np.zeros((max_output * n_sample, scores.shape[-1]), dtype=np.float32) # nums = np.zeros(n_sample, dtype=np.float32) idx_start = 0 for n in range(n_sample): box, score_combined, score = boxes[n], scores_combined[n], scores[ n] mask = (score_combined >= score_threshold).ravel() box = box[mask] score_combined = score_combined[mask] score = score[mask] nms_indexes = non_max_suppression(box, score_combined, iou_threshold)[:max_output] if len(nms_indexes) != 0: selected_boxes.append(box[nms_indexes]) selected_scores.append(score[nms_indexes]) else: selected_boxes.append(np.empty((0, box.shape[-1]), np.int32)) selected_scores.append( np.empty((0, score.shape[-1]), np.float32)) # num = len(nms_indexes) # if num != 0: # # selected_boxes[idx_start: idx_start + num] = box[nms_indexes] # selected_scores[idx_start: idx_start + num] = score[nms_indexes] # nums[n] = num # idx_start += num return selected_boxes, selected_scores return _nms(np.array(boxes), np.array(scores_combined), np.array(scores)) boxes, scores, nums = _nms(np.array(boxes), np.array(scores_combined), np.array(scores)) return tf.convert_to_tensor(boxes, tf.float32), tf.convert_to_tensor( scores, tf.float32), tf.convert_to_tensor(nums, tf.int32)
def body(i, boxes, scores, nums): selected_indices, selected_scores = tf.image.non_max_suppression_with_scores( tf.reshape(batch_boxes[i], (-1, 4)), tf.reshape(batch_scores_combined[i], (-1, )), max_output_size=get_flag("yolo_max_boxes", max_box), iou_threshold=get_flag("yolo_iou_threshold", 0.45), score_threshold=get_flag("yolo_score_threshold", 0.2), soft_nms_sigma=get_flag("yolo_soft_nms_sigma", 0.0)) selected_boxes = tf.gather(batch_boxes[i], selected_indices) selected_scores = tf.gather(batch_scores[i], selected_indices) boxes = tf.concat([boxes, selected_boxes], axis=0) scores = tf.concat([scores, selected_scores], axis=0) nums = tf.concat([nums, [tf.shape(selected_indices)[0]]], axis=0) return tf.add(i, 1), boxes, scores, nums
def yolo_boxes(pred, anchors, classes): # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes)) grid_size = tf.shape(pred)[1] box_xy, box_wh, objectness, class_probs = tf.split(pred, (2, 2, 1, sum(classes)), axis=-1) box_xy = tf.sigmoid(box_xy) * get_flag("yolo_scale_xy", 1.1) objectness = tf.sigmoid(objectness) class_probs = tf.sigmoid(class_probs) pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss # !!! grid[x][y] == (y, x) grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2] box_xy = (box_xy + tf.cast(grid, tf.float32)) / tf.cast( grid_size, tf.float32) box_wh = tf.exp(box_wh) * anchors box_x1y1 = box_xy - box_wh / 2 box_x2y2 = box_xy + box_wh / 2 bbox = tf.concat([box_x1y1, box_x2y2], axis=-1) return bbox, objectness, class_probs, pred_box
def body(i, boxes, scores, nums): selected_indices, selected_scores = tf.image.non_max_suppression_with_scores( tf.reshape(batch_boxes[i], (-1, 4)), tf.reshape(batch_scores_combined[i], (-1, )), max_output_size=get_flag("yolo_max_boxes", 10), iou_threshold=get_flag("yolo_iou_threshold", 0.45), score_threshold=get_flag("yolo_score_threshold", 0.2)) selected_boxes = tf.gather(batch_boxes[i], selected_indices) selected_scores = tf.gather(batch_scores[i], selected_indices) boxes.write(i, selected_boxes) scores.write(i, selected_scores) nums.write(i, tf.shape(selected_indices)[0]) # selected.write(i, tf.concat([boxes, scores], axis=-1)) # selected.append([boxes, scores]) return tf.add(i, 1), boxes, scores, nums
def body(boxes, scores_combined, scores): selected_indices, selected_scores = tf.image.non_max_suppression_with_scores( tf.reshape(boxes, (-1, 4)), tf.reshape(scores_combined, (-1, )), max_output_size=get_flag("yolo_max_boxes", max_box), iou_threshold=get_flag("yolo_iou_threshold", 0.45), score_threshold=get_flag("yolo_score_threshold", 0.2), soft_nms_sigma=get_flag("yolo_soft_nms_sigma", 0.0)) selected_count = tf.shape(selected_indices)[0] selected_boxes = tf.gather(boxes, selected_indices) selected_scores = tf.gather(scores, selected_indices) pad_num = max_box - selected_count return tf.concat([selected_boxes, tf.zeros((pad_num, 4), dtype=tf.float32)], axis=0), \ tf.concat([selected_scores, tf.zeros((pad_num, tf.shape(batch_scores)[-1]), dtype=tf.float32)], axis=0),\ selected_count
def tf_augment_data(self, x_train, y_train): x_train, y_train = tf.numpy_function(self.augment_data, [x_train, y_train], [tf.float32, tf.float32]) x_train.set_shape((self.size, self.size, self.num_channel)) y_train.set_shape((get_flag("yolo_max_boxes", self.max_box), 4 + self.sum_num_classes)) return x_train, y_train
def get_test_dataset(model, voc_set, batch_size=24): test_p = TfDataset(model.input_size[0], model.channels, model.img_aug, model.num_classes, model.max_box) test_p.dataset = test_p.load_voc_dataset(voc_set.testing, model.to_index) test_p.dataset_map( test_p.parse_detection_example(test_p.image_feature_map, len(model.num_classes), model.channels)) test_p.dataset_map(test_p.tf_augment_data) test_p.dataset_batch(get_flag("batch_size", batch_size)) # org_p = TfDataset(model.input_size[0], model.img_aug, len(model.class_names)) # org_p.dataset = org_p.load_voc_dataset(voc_set.testing, model.to_index) # org_p.dataset_map(org_p.parse_detection_example(org_p.image_feature_map)) # org_p.dataset_batch(get_flag("batch_size", batch_size)) return test_p.dataset
def augment_data(self, x_train, y_train): image = x_train bbs = [] for box in y_train: bbs.append( ia.BoundingBox(x1=box[0], y1=box[1], x2=box[2], y2=box[3], label=box[4:])) bboi = ia.BoundingBoxesOnImage(bbs, shape=image.shape) images, bboxes = self.img_aug.aug([image], [bboi]) y_train = np.zeros((get_flag("yolo_max_boxes", self.max_box), 4 + self.sum_num_classes), dtype=np.float32) bbs = bboxes[0].remove_out_of_image().clip_out_of_image() for i, bbox in enumerate(bbs.bounding_boxes): # skip any coord < 0 or >= size bbox after augmentation if not 0 < bbox.center_x < self.size or not 0 < bbox.center_y < self.size: continue y_train[i][ 0: 4] = bbox.x1 / self.size, bbox.y1 / self.size, bbox.x2 / self.size, bbox.y2 / self.size # convert label to one hot y_idx_start = 4 for idx_cat in range(len(self.num_classes)): y_idx_end = y_idx_start + self.num_classes[idx_cat] y_train[i][y_idx_start:y_idx_end] = np.eye( self.num_classes[idx_cat])[int(bbox.label[idx_cat])] y_idx_start = y_idx_end x_train = tf.convert_to_tensor(images[0], dtype=tf.float32) y_train = tf.convert_to_tensor(y_train, dtype=tf.float32) return x_train, y_train
def _calculate_loss(i, j): return obj_mask * binary_crossentropy(true_class[..., i:j], pred_class[..., i:j], label_smoothing=get_flag( "yolo_label_smoothing", 0.0))