def batch_gpu_nms(dets, thresh): """ Implement NMS in gpu :param dets: (Batch, N, 5)bbox and score :param thresh: (1) the NMS algorithm thresh :return: keep_idx: list of keeping index keep:(N) 0-1 mask """ scores = dets[:, 4] order = scores.sort(0, descending=True) batch_size = dets.shape[0].item() keep_idx = [] keep = torch.zeros(batch_size) boxs = dets[:, :4] for idx in order[1]: if scores[idx] != 0: box_idx = boxs[idx] box_idx = torch.stack([box_idx] * batch_size, dim=0) _iou = iou(box_idx, boxs) scores = torch.where(_iou > thresh, torch.zeros_like(scores).cuda(), scores) keep_idx.append(idx) keep[idx] = 1 return keep_idx, keep
def gpu_nms(dets, thresh, confidence=0.4): """ Implement NMS in gpu :param dets: (N, 5)bbox and score :param thresh: (1) the NMS algorithm thresh :return: keep_idx: list of keeping index keep:(N) 0-1 mask """ scores = dets[:, 4] order = scores.sort(0, descending=True) keep_idx = [] keep = torch.zeros(scores.shape) boxs = dets[:, :4] scores_keep = torch.stack([order[0], order[1].float()], dim=1) scores_keep = scores_keep[scores_keep[:, 0] > confidence, :] boxs_keep = boxs[scores_keep[:, 1].long(), :] boxs_keep, scores_keep = delect_box(boxs_keep, scores_keep) while scores_keep.shape[0] > 0: idx = scores_keep[0, 1].long() box = boxs[idx] _iou = iou(torch.stack([box] * scores_keep.shape[0], dim=0), boxs_keep) scores_keep = scores_keep[_iou < thresh, :] boxs_keep = boxs_keep[_iou < thresh, :] keep_idx.append(idx) keep[idx] = 1 return keep_idx, keep
def create_models(backbone_retinanet, num_classes, weights, num_gpus=0, freeze_backbone=False, lr=1e-5, config=None): """ Creates three models (model, training_model, prediction_model). Args backbone_retinanet: A function to call to create a retinanet model with a given backbone. num_classes: The number of classes to train. weights: The weights to load into the model. num_gpus: The number of GPUs to use for training. freeze_backbone: If True, disables learning for the backbone. config: Config parameters, None indicates the default configuration. Returns model: The base model. This is also the model that is saved in snapshots. training_model: The training model. If num_gpus=0, this is identical to model. prediction_model: The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). """ modifier = freeze_model if freeze_backbone else None # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. # optionally wrap in a parallel model if num_gpus > 1: from keras.utils import multi_gpu_model with tf.device('/cpu:0'): model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=num_gpus) else: model = model_with_weights(backbone_retinanet(num_classes, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # make prediction model prediction_model = retinanet_bbox(model=model) # compile model training_model.compile(loss={ 'regression': losses.iou(), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=lr)) return model, training_model, prediction_model
def __init__(self, backbone): # a dictionary mapping custom layer names to the correct classes self.custom_objects = { 'UpsampleLike': layers.UpsampleLike, 'PriorProbability': initializers.PriorProbability, 'RegressBoxes': layers.RegressBoxes, 'FilterDetections': layers.FilterDetections, 'Anchors': layers.Anchors, 'ClipBoxes': layers.ClipBoxes, '_focal': losses.focal(), 'bce_': losses.bce(), 'iou_': losses.iou(), } self.backbone = backbone self.validate()
def train(self, epochs, backbone_name, evaluation): #Compile model self.model.compile( loss={ 'regression': losses.iou(), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=1e-5) # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True) ) # create the generators train_generator, validation_generator = create_generators( self.config, self.dataset) # create the callbacks callbacks = create_callbacks( self.config, backbone_name, self.model, self.training_model, self.prediction_model, validation_generator, evaluation, self.log_dir, ) # start training return self.training_model.fit_generator( generator=train_generator, initial_epoch=0, steps_per_epoch=self.config.STEPS_PER_EPOCH, epochs=epochs, verbose=1, callbacks=callbacks, max_queue_size=10, validation_data=validation_generator)
def main(args=None): # parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # create object that stores backbone information backbone = models.backbone(args.backbone) # make sure keras is the minimum required version check_keras_version() # optionally choose specific GPU if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu keras.backend.tensorflow_backend.set_session(get_session()) # optionally load config parameters if args.config: args.config = read_config_file(args.config) # create the generators train_generator, validation_generator = create_generators( args, backbone.preprocess_image) # create the model if args.snapshot is not None: print('Loading model, this may take a second...') model = models.load_model(args.snapshot, backbone_name=args.backbone) training_model = model anchor_params = None if args.config and 'anchor_parameters' in args.config: anchor_params = parse_anchor_parameters(args.config) prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile( loss={ 'regression': losses.iou(), 'classification': losses.focal(), 'centerness': losses.bce(), }, optimizer=keras.optimizers.adam(lr=1e-5) # optimizer=keras.optimizers.sgd(lr=1e-5, momentum=0.9, decay=1e-5, nesterov=True) ) else: weights = args.weights # default to imagenet if nothing else is specified if weights is None and args.imagenet_weights: weights = backbone.download_imagenet() print('Creating model, this may take a second...') model, training_model, prediction_model = create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), weights=weights, num_gpus=args.num_gpus, freeze_backbone=args.freeze_backbone, lr=args.lr, config=args.config) # print model summary # print(model.summary()) # this lets the generator compute backbone layer shapes using the actual backbone model if 'vgg' in args.backbone or 'densenet' in args.backbone: train_generator.compute_shapes = make_shapes_callback(model) if validation_generator: validation_generator.compute_shapes = train_generator.compute_shapes # create the callbacks callbacks = create_callbacks( model, training_model, prediction_model, validation_generator, args, ) if not args.compute_val_loss: validation_generator = None # start training return training_model.fit_generator( generator=train_generator, initial_epoch=0, steps_per_epoch=args.steps, epochs=args.epochs, verbose=1, callbacks=callbacks, workers=args.workers, use_multiprocessing=args.multiprocessing, max_queue_size=args.max_queue_size, validation_data=validation_generator)
def level_select(cls_pred, regr_pred, gt_boxes, feature_shapes, strides, pos_scale=0.2): """ Args: cls_pred: (sum(fh * fw), num_classes) regr_pred: (sum(fh * fw), 4) gt_boxes: (MAX_NUM_GT_BOXES, 5) feature_shapes: (5, 2) strides: pos_scale: Returns: """ gt_labels = tf.cast(gt_boxes[:, 4], tf.int32) gt_boxes = gt_boxes[:, :4] focal_loss = focal() iou_loss = iou() gt_boxes, non_zeros = trim_zeros_graph(gt_boxes) num_gt_boxes = tf.shape(gt_boxes)[0] gt_labels = tf.boolean_mask(gt_labels, non_zeros) level_losses = [] for level_id in range(len(strides)): stride = strides[level_id] fh = feature_shapes[level_id][0] fw = feature_shapes[level_id][1] fa = tf.reduce_prod(feature_shapes, axis=-1) start_idx = tf.reduce_sum(fa[:level_id]) end_idx = start_idx + fh * fw cls_pred_i = tf.reshape(cls_pred[start_idx:end_idx], (fh, fw, tf.shape(cls_pred)[-1])) regr_pred_i = tf.reshape(regr_pred[start_idx:end_idx], (fh, fw, tf.shape(regr_pred)[-1])) proj_boxes = gt_boxes / stride x1, y1, x2, y2 = prop_box_graph(proj_boxes, pos_scale, fw, fh) def compute_gt_box_loss(args): x1_ = args[0] y1_ = args[1] x2_ = args[2] y2_ = args[3] gt_box = args[4] gt_label = args[5] locs_cls_pred_i = cls_pred_i[y1_:y2_, x1_:x2_, :] locs_cls_pred_i = tf.reshape(locs_cls_pred_i, (-1, tf.shape(locs_cls_pred_i)[-1])) locs_cls_true_i = tf.zeros_like(locs_cls_pred_i) gt_label_col = tf.ones_like(locs_cls_true_i[:, 0:1]) locs_cls_true_i = tf.concat([locs_cls_true_i[:, :gt_label], gt_label_col, locs_cls_true_i[:, gt_label + 1:], ], axis=-1) loss_cls = focal_loss(K.expand_dims(locs_cls_true_i, axis=0), K.expand_dims(locs_cls_pred_i, axis=0)) locs_regr_pred_i = regr_pred_i[y1_:y2_, x1_:x2_, :] locs_regr_pred_i = tf.reshape(locs_regr_pred_i, (-1, tf.shape(locs_regr_pred_i)[-1])) locs_x = K.arange(x1_, x2_, dtype=tf.float32) locs_y = K.arange(y1_, y2_, dtype=tf.float32) shift_x = (locs_x + 0.5) * stride shift_y = (locs_y + 0.5) * stride shift_xx, shift_yy = tf.meshgrid(shift_x, shift_y) shift_xx = tf.reshape(shift_xx, (-1,)) shift_yy = tf.reshape(shift_yy, (-1,)) shifts = K.stack((shift_xx, shift_yy, shift_xx, shift_yy), axis=-1) l = tf.maximum(shifts[:, 0] - gt_box[0], 0) t = tf.maximum(shifts[:, 1] - gt_box[1], 0) r = tf.maximum(gt_box[2] - shifts[:, 2], 0) b = tf.maximum(gt_box[3] - shifts[:, 3], 0) locs_regr_true_i = tf.stack([l, t, r, b], axis=-1) locs_regr_true_i /= 4.0 loss_regr = iou_loss(K.expand_dims(locs_regr_true_i, axis=0), K.expand_dims(locs_regr_pred_i, axis=0)) return loss_cls + loss_regr level_loss = tf.map_fn( compute_gt_box_loss, elems=[x1, y1, x2, y2, gt_boxes, gt_labels], dtype=tf.float32 ) level_losses.append(level_loss) losses = tf.stack(level_losses, axis=-1) gt_box_levels = tf.argmin(losses, axis=-1) padding_gt_box_levels = tf.ones((MAX_NUM_GT_BOXES - num_gt_boxes), dtype=tf.int64) * -1 gt_box_levels = tf.concat([gt_box_levels, padding_gt_box_levels], axis=0) return gt_box_levels
for (imgs, labels, _) in tqdm(train_loader): if args.cuda: imgs, labels = imgs.cuda(), labels.cuda() optim.zero_grad() out = model(imgs) batch_train_loss = loss(out, labels) if args.cuda: out = out.cpu() labels = labels.cpu() batch_train_loss = batch_train_loss.cpu() batch_train_iou = iou(out, labels, threshold=0.5, activation="sigmoid") train_loss += batch_train_loss.item() train_iou += batch_train_iou.item() batch_train_loss.backward() optim.step() scheduler.step() if epoch % Tres == 0: Tres = Tmax * Tmult + Tres Tmax = Tmax * Tmult optim.param_groups[0]['lr'] = lr scheduler = CosineAnnealingLR(optim, Tmax, eta_min=0.003)
def build_meta_select_target(cls_pred, regr_pred, gt_boxes, feature_shapes, strides, shrink_ratio=0.2): gt_labels = tf.cast(gt_boxes[:, 4], tf.int32) gt_boxes = gt_boxes[:, :4] max_gt_boxes = tf.shape(gt_boxes)[0] focal_loss = focal() iou_loss = iou() gt_boxes, non_zeros = trim_padding_boxes(gt_boxes) num_gt_boxes = tf.shape(gt_boxes)[0] gt_labels = tf.boolean_mask(gt_labels, non_zeros) level_losses = [] for level_id in range(len(strides)): stride = strides[level_id] fh = feature_shapes[level_id][0] fw = feature_shapes[level_id][1] fa = tf.reduce_prod(feature_shapes, axis=-1) start_idx = tf.reduce_sum(fa[:level_id]) end_idx = start_idx + fh * fw cls_pred_i = tf.reshape(cls_pred[start_idx:end_idx], (fh, fw, tf.shape(cls_pred)[-1])) regr_pred_i = tf.reshape(regr_pred[start_idx:end_idx], (fh, fw, tf.shape(regr_pred)[-1])) # (num_gt_boxes, ) x1, y1, x2, y2 = shrink_and_project_boxes(gt_boxes, fw, fh, stride, shrink_ratio=shrink_ratio) def compute_gt_box_loss(args): x1_ = args[0] y1_ = args[1] x2_ = args[2] y2_ = args[3] gt_box = args[4] gt_label = args[5] def do_match_pixels_in_level(): locs_cls_pred_i = cls_pred_i[y1_:y2_, x1_:x2_, :] locs_cls_pred_i = tf.reshape( locs_cls_pred_i, (-1, tf.shape(locs_cls_pred_i)[-1])) locs_cls_true_i = tf.zeros_like(locs_cls_pred_i) gt_label_col = tf.ones_like(locs_cls_true_i[:, 0:1]) locs_cls_true_i = tf.concat([ locs_cls_true_i[:, :gt_label], gt_label_col, locs_cls_true_i[:, gt_label + 1:], ], axis=-1) loss_cls = focal_loss(tf.expand_dims(locs_cls_true_i, axis=0), tf.expand_dims(locs_cls_pred_i, axis=0)) locs_regr_pred_i = regr_pred_i[y1_:y2_, x1_:x2_, :] locs_regr_pred_i = tf.reshape( locs_regr_pred_i, (-1, tf.shape(locs_regr_pred_i)[-1])) locs_x = tf.cast(tf.range(x1_, x2_), dtype=tf.float32) locs_y = tf.cast(tf.range(y1_, y2_), dtype=tf.float32) shift_x = (locs_x + 0.5) * stride shift_y = (locs_y + 0.5) * stride shift_xx, shift_yy = tf.meshgrid(shift_x, shift_y) shift_xx = tf.reshape(shift_xx, (-1, )) shift_yy = tf.reshape(shift_yy, (-1, )) shifts = tf.stack((shift_xx, shift_yy, shift_xx, shift_yy), axis=-1) l = tf.maximum(shifts[:, 0] - gt_box[0], 0) t = tf.maximum(shifts[:, 1] - gt_box[1], 0) r = tf.maximum(gt_box[2] - shifts[:, 2], 0) b = tf.maximum(gt_box[3] - shifts[:, 3], 0) locs_regr_true_i = tf.stack([l, t, r, b], axis=-1) locs_regr_true_i = locs_regr_true_i / 4.0 / stride loss_regr = iou_loss(tf.expand_dims(locs_regr_true_i, axis=0), tf.expand_dims(locs_regr_pred_i, axis=0)) return loss_cls + loss_regr def do_not_match_pixels_in_level(): box_loss = tf.constant(1e7, dtype=tf.float32) return box_loss level_box_loss = tf.cond( tf.equal(tf.cast(x1_, tf.int32), tf.cast(x2_, tf.int32)) | tf.equal(tf.cast(y1_, tf.int32), tf.cast(y2_, tf.int32)), do_not_match_pixels_in_level, do_match_pixels_in_level) return level_box_loss level_loss = tf.map_fn(compute_gt_box_loss, elems=[x1, y1, x2, y2, gt_boxes, gt_labels], dtype=tf.float32) level_losses.append(level_loss) losses = tf.stack(level_losses, axis=-1) gt_box_levels = tf.argmin(losses, axis=-1, output_type=tf.int32) padding_gt_box_levels = tf.ones( (max_gt_boxes - num_gt_boxes), dtype=tf.int32) * -1 gt_box_levels = tf.concat([gt_box_levels, padding_gt_box_levels], axis=0) return gt_box_levels