Пример #1
0
    def validate(self):
        self.model.eval()

        total_loss = []
        pred_label = []
        target_label = []
        with torch.no_grad():
            for step, image,label in enumerate(self.val_data_loader):
                batch = self.batch_to_device(image,label)

                outputs = self.model(batch['img'])
                
                loss = self.criterion(outputs, batch['label'])

                total_loss.append(loss.item())
                
                pred_label.append(outputs)
                target_label.append(label)
                del outputs
                del loss

        total_loss = np.mean(total_loss)
        self.model.train()
        
        return total_loss,compute_iou(pred_label,target_label)
Пример #2
0
    def eval_mesh(self, mesh, pointcloud_tgt, normals_tgt, points_iou,
                  occ_tgt):
        ''' Evaluates a mesh.

        Args:
            mesh (trimesh): mesh which should be evaluated
            pointcloud_tgt (numpy array): target point cloud
            normals_tgt (numpy array): target normals
            points_iou (numpy_array): points tensor for IoU evaluation
            occ_tgt (numpy_array): GT occupancy values for IoU points
        '''
        if len(mesh.vertices) != 0 and len(mesh.faces) != 0:
            pointcloud, idx = mesh.sample(self.n_points, return_index=True)
            pointcloud = pointcloud.astype(np.float32)
            normals = mesh.face_normals[idx]
        else:
            pointcloud = np.empty((0, 3))
            normals = np.empty((0, 3))

        out_dict = self.eval_pointcloud(pointcloud, pointcloud_tgt, normals,
                                        normals_tgt)

        if len(mesh.vertices) != 0 and len(mesh.faces) != 0:
            occ = check_mesh_contains(mesh, points_iou)
            out_dict['iou'] = compute_iou(occ, occ_tgt)
        else:
            out_dict['iou'] = 0.

        return out_dict
Пример #3
0
def test(
    net,
    epoch,
    data_loader,
    result_file,
    config,
    device
):

  net.eval()

  if config.DEVICE.find('cuda') != -1:
    torch.cuda.empty_cache()  # 回收缓存的显存

  iou = {
    "TP": {i: 0 for i in range(8)},
    "TA": {i: 0 for i in range(8)}
  }

  total_loss = 0.0

  dataprocess = tqdm(data_loader)

  for batch_item in dataprocess:

    image, mask = batch_item['image'], batch_item['mask']
    image = image.to(device)
    mask = mask.to(device)

    out = net(image)
    out = F.softmax(out, dim=1)

    loss = utils.create_loss(out, mask, config.NUM_CLASSES)
    total_loss += loss.item()

    # 计算每个类别的混淆矩阵
    iou = utils.compute_iou(out, mask, iou)

    dataprocess.set_description_str("epoch:{}".format(epoch))
    dataprocess.set_postfix_str(
      "mask_loss:{:.4f}".format(loss)
    )

  result_file.write("Epoch:{} \n".format(epoch))
  for i in range(8):
    result_string = "{}: {:.4f} \n".format(
      i,
      iou["TP"][i] / iou["TA"][i]
    )
    print(result_string)
    result_file.write(result_string)

  # message
  info = "Epoch:{}, mean loss is {:.4f} \n".format(
    epoch, total_loss / len(data_loader)
  )
  result_file.write(info)
  result_file.flush()
Пример #4
0
def encode_label(image, gt_boxes):
    target_scores = np.zeros(shape=[45, 60, 9,
                                    2])  # 0: background, 1: foreground, ,
    target_bboxes = np.zeros(shape=[45, 60, 9, 4])  # t_x, t_y, t_w, t_h
    target_masks = np.zeros(
        shape=[45, 60, 9])  # negative_samples: -1, positive_samples: 1
    for i in range(45):  # y: height
        for j in range(60):  # x: width
            for k in range(9):
                center_x = j * grid_width + grid_width * 0.5
                center_y = i * grid_height + grid_height * 0.5
                xmin = center_x - wandhG[k][0] * 0.5
                ymin = center_y - wandhG[k][1] * 0.5
                xmax = center_x + wandhG[k][0] * 0.5
                ymax = center_y + wandhG[k][1] * 0.5
                # print(xmin, ymin, xmax, ymax)
                # ignore cross-boundary anchors
                if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & (
                        ymax < (image_height + 5)):
                    anchor_boxes = np.array([xmin, ymin, xmax, ymax])
                    anchor_boxes = np.expand_dims(anchor_boxes, axis=0)
                    # compute iou between this anchor and all ground-truth boxes in image.
                    ious = compute_iou(anchor_boxes, gt_boxes)
                    positive_masks = ious > pos_thresh
                    negative_masks = ious < neg_thresh

                    if np.any(positive_masks):
                        plot_boxes_on_image(image, anchor_boxes, thickness=1)
                        print("=> encode: %d, %d, %d" % (i, j, k))
                        cv2.circle(image,
                                   center=(int(0.5 * (xmin + xmax)),
                                           int(0.5 * (ymin + ymax))),
                                   radius=1,
                                   color=[255, 0, 0],
                                   thickness=4)

                        target_scores[i, j, k, 1] = 1.
                        target_masks[i, j,
                                     k] = 1  # labeled as a positive sample
                        # find out which ground-truth box matches this anchor
                        max_iou_idx = np.argmax(ious)
                        selected_gt_boxes = gt_boxes[max_iou_idx]
                        target_bboxes[i, j, k] = compute_regression(
                            selected_gt_boxes, anchor_boxes[0])

                    if np.all(negative_masks):
                        target_scores[i, j, k, 0] = 1.
                        target_masks[i, j,
                                     k] = -1  # labeled as a negative sample
                        cv2.circle(image,
                                   center=(int(0.5 * (xmin + xmax)),
                                           int(0.5 * (ymin + ymax))),
                                   radius=1,
                                   color=[0, 0, 0],
                                   thickness=4)
    Image.fromarray(image).show()
    return target_scores, target_bboxes, target_masks
Пример #5
0
 def compute_target(self, anchors, box):
     regression_label = box_transform(anchors, box)
     iou = compute_iou(anchors, box).flatten()
     pos_index = np.where(iou > config.pos_threshold)[0]
     neg_index = np.where(iou < config.neg_threshold)[0]
     classification_label = np.ones_like(iou, dtype=np.float32) * -1
     classification_label[pos_index] = 1
     classification_label[neg_index] = 0
     return regression_label, classification_label
Пример #6
0
def test(cfg, model, post_processor, criterion, device, test_loader):
    """
    Return: a validation metric between 0-1 where 1 is perfect
    """
    model.eval()
    post_processor.eval()
    test_loss = 0
    correct = 0
    # TODO: use a more consistent evaluation interface
    pixel_acc_list = []
    iou_list = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            feature = model(data)
            if cfg.task == "classification":
                output = post_processor(feature)
            elif cfg.task == "semantic_segmentation":
                ori_spatial_res = data.shape[-2:]
                output = post_processor(feature, ori_spatial_res)
            test_loss += criterion(output, target).item()  # sum up batch loss
            if cfg.task == "classification":
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
            elif cfg.task == "semantic_segmentation":
                pred_map = output.max(dim=1)[1]
                batch_acc, _ = utils.compute_pixel_acc(
                    pred_map, target, fg_only=cfg.METRIC.SEGMENTATION.fg_only)
                pixel_acc_list.append(float(batch_acc))
                for i in range(pred_map.shape[0]):
                    iou = utils.compute_iou(
                        np.array(pred_map[i].cpu()),
                        np.array(target[i].cpu(), dtype=np.int64),
                        cfg.num_classes,
                        fg_only=cfg.METRIC.SEGMENTATION.fg_only)
                    iou_list.append(float(iou))
            else:
                raise NotImplementedError

    test_loss /= len(test_loader.dataset)

    if cfg.task == "classification":
        acc = 100. * correct / len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.
              format(test_loss, correct, len(test_loader.dataset), acc))
        return acc
    elif cfg.task == "semantic_segmentation":
        m_iou = np.mean(iou_list)
        print(
            '\nTest set: Average loss: {:.4f}, Mean Pixel Accuracy: {:.4f}, Mean IoU {:.4f}'
            .format(test_loss, np.mean(pixel_acc_list), m_iou))
        return m_iou
    else:
        raise NotImplementedError
Пример #7
0
def test_epoch(model, dataloader, device, epoch):
    model.eval()
    with torch.no_grad():
        correct, correct_cls, total = 0, 0, 0
        for X, y in dataloader:
            X, gt_cls, gt_bbox = X.to(device), y['cls'].to(
                device), y['bbox'].to(device)
            logits, bbox = model(X)
            correct += sum((
                torch.argmax(logits, axis=1) == gt_cls).cpu().detach().numpy()
                           &
                           (compute_iou(bbox.cpu(), gt_bbox.cpu()) > iou_thr))
            correct_cls += sum((torch.argmax(logits, axis=1) == gt_cls))
            total += len(X)
        print(f' val acc: {correct / total * 100:.2f}')
Пример #8
0
def eval(model, criterion, optimizer, loader, batch_size, epoch_it, rep):
    model.eval()
    loss_collect = []
    metric_collect = []
    if rep == 'occ':
        sigmoid = torch.nn.Sigmoid()

    with tqdm(total=int(len(loader)), ascii=True) as pbar:
        with torch.no_grad():
            for mbatch in loader:
                img_input, points_input, values = mbatch
                img_input = Variable(img_input).cuda()

                points_input = Variable(points_input).cuda()
                values = Variable(values).cuda()

                optimizer.zero_grad()

                logits = model(points_input, img_input)

                loss = criterion(logits, values)

                loss_collect.append(loss.data.cpu().item())

                if rep == 'occ':
                    logits = sigmoid(logits)

                    iou = utils.compute_iou(logits.detach().cpu().numpy(), \
                                values.detach().cpu().numpy())
                    metric_collect.append(iou)
                elif rep == 'sdf':
                    # acc_sign is sign IoU
                    # acc_thres is accuracy within a threshold
                    # More detail explanation in utils.py
                    acc_sign, acc_thres, iou = utils.compute_acc(\
                                        logits.detach().cpu().numpy(), \
                                        values.detach().cpu().numpy())
                    metric_collect.append([acc_sign, acc_thres, iou])
                pbar.update(1)

    mean_loss = np.mean(np.array(loss_collect))
    if rep == 'occ':
        mean_metric = np.mean(np.concatenate(metric_collect))
        mean_metric = [mean_metric]
    else:
        mean_metric = np.mean(np.array(metric_collect), axis=0)

    return mean_loss, mean_metric
Пример #9
0
    def get_abox_idx(bbox, i, j):
        '''
        given a grouth truth bbox,
        get the index of the anchor box at i,j grid cell
        that has the highest IOU
        '''
        ious = []
        for anchor in anchor_boxes:
            anchor_coord = get_anchor(anchor, i, j)
            bbox = np.array(bbox)
            anchor_coord = np.array(list(anchor_coord))

            iou = compute_iou(bbox, anchor_coord)
            ious.append(iou)
        m = max(ious)
        return ious.index(m)
Пример #10
0
    def _match_anchor_boxes(self,
                            anchor_boxes,
                            gt_boxes,
                            match_iou=0.5,
                            ignore_iou=0.4):
        """Matches ground truth boxes to anchor boxes based on IOU.

        1. Calculates the pairwise IOU for the M `anchor_boxes` and N `gt_boxes`
          to get a `(M, N)` shaped matrix.
        2. The ground truth box with the maximum IOU in each row is assigned to
          the anchor box provided the IOU is greater than `match_iou`.
        3. If the maximum IOU in a row is less than `ignore_iou`, the anchor
          box is assigned with the background class.
        4. The remaining anchor boxes that do not have any class assigned are
          ignored during training.

        Arguments:
          anchor_boxes: A float tensor with the shape `(total_anchors, 4)`
            representing all the anchor boxes for a given input image shape,
            where each anchor box is of the format `[x, y, width, height]`.
          gt_boxes: A float tensor with shape `(num_objects, 4)` representing
            the ground truth boxes, where each box is of the format
            `[x, y, width, height]`.
          match_iou: A float value representing the minimum IOU threshold for
            determining if a ground truth box can be assigned to an anchor box.
          ignore_iou: A float value representing the IOU threshold under which
            an anchor box is assigned to the background class.

        Returns:
          matched_gt_idx: Index of the matched object
          positive_mask: A mask for anchor boxes that have been assigned ground
            truth boxes.
          ignore_mask: A mask for anchor boxes that need to by ignored during
            training
        """
        iou_matrix = compute_iou(anchor_boxes, gt_boxes)
        max_iou = tf.reduce_max(iou_matrix, axis=1)
        matched_gt_idx = tf.argmax(iou_matrix, axis=1)
        positive_mask = tf.greater_equal(max_iou, match_iou)
        negative_mask = tf.less(max_iou, ignore_iou)
        ignore_mask = tf.logical_not(
            tf.logical_or(positive_mask, negative_mask))
        return (
            matched_gt_idx,
            tf.cast(positive_mask, dtype=tf.float32),
            tf.cast(ignore_mask, dtype=tf.float32),
        )
Пример #11
0
def process_set(sequence_name_list, output_dir, img_dir,
                anno_dir, embedding_dir, gc_dir, seeds_dir):
  result_list = []
  result_log = []
  for s in sequence_name_list:
    if output_dir:
      sequence_output = os.path.join(output_dir, s)
      if not os.path.exists(sequence_output):
        os.mkdir(sequence_output)
    else:
      sequence_output = None

    frame = 0
    length = len(glob.glob('%s/%s/*.jpg' % (img_dir, s)))
    seg_list = []
    gt_list = []

    # The last frame is not processed, allowed according to DAVIS evaluation.
    while frame < length - 1:
      if frame % 10 == 0:
        print '%d/%d' % (frame, length)
      gt = cv2.imread('%s/%s/%05d.png' % (anno_dir, s, frame),
                      flags=cv2.IMREAD_GRAYSCALE)
      gt = (gt > 0).astype(np.uint8)
      gt_list.append(gt)

      seg = vos_frame(s, frame, img_dir, embedding_dir,
                      gc_dir, seeds_dir, frame == length - 2)

      if sequence_output:
        cv2.imwrite('%s/seg_%05d.png' % (sequence_output, frame),
                      (seg * 255).astype(np.uint8))

      seg_list.append(seg)
      frame += 1

    # Evaluation
    iou = utils.compute_iou(seg_list, gt_list)
    mean_iou = np.average(iou)
    result_log.append('%s\t%.5f\n'%(s, mean_iou))
    print s, np.average(iou)
    result_list.append(np.average(iou))
  result_log.append('avg\t%.5f\b'%(np.sum(np.array(result_list))/len(result_list)))
  print 'avg\t', np.sum(np.array(result_list))/len(result_list)
  return result_log
Пример #12
0
 def create_response_label(self, response_map, s_x, anchor_id):
     scale_x = config.instance_size / s_x
     max_x, max_y = np.unravel_index(response_map.argmax(),
                                     response_map.shape)
     max_x = max_x - 8
     max_y = max_y - 8
     bbox = [
         max_y * config.total_stride, max_x * config.total_stride,
         self.target_sz[0] * scale_x, self.target_sz[1] * scale_x
     ]
     iou = compute_iou(self.anchors, bbox).flatten()
     pos_index = np.where(iou > config.pos_threshold)[0]
     neg_index = np.where(iou <= config.neg_threshold)[0]
     classification_label = np.tile(response_map.flatten(),
                                    config.anchor_num)
     # classification_label = np.ones_like(iou, dtype=np.float32)*-1
     classification_label[pos_index] = 1
     classification_label[neg_index] = 0
     return classification_label[anchor_id * 289:anchor_id * 289 +
                                 289].reshape(17, 17)
Пример #13
0
def encode_label(gt_boxes):
    target_scores = np.zeros(shape=[wnum, hnum, 9,
                                    2])  # 0: background, 1: foreground, ,
    target_bboxes = np.zeros(shape=[wnum, hnum, 9, 4])  # t_x, t_y, t_w, t_h
    target_masks = np.zeros(
        shape=[wnum, hnum, 9])  # negative_samples: -1, positive_samples: 1
    for i in range(wnum):  # y: height
        for j in range(hnum):  # x: width
            for k in range(9):
                center_x = j * grid_width + grid_width * 0.5
                center_y = i * grid_height + grid_height * 0.5
                xmin = center_x - wandhG[k][0] * 0.5
                ymin = center_y - wandhG[k][1] * 0.5
                xmax = center_x + wandhG[k][0] * 0.5
                ymax = center_y + wandhG[k][1] * 0.5
                # print(xmin, ymin, xmax, ymax)
                # ignore cross-boundary anchors
                if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & (
                        ymax < (image_height + 5)):
                    anchor_boxes = np.array([xmin, ymin, xmax, ymax])
                    anchor_boxes = np.expand_dims(anchor_boxes, axis=0)
                    # compute iou between this anchor and all ground-truth boxes in image.
                    ious = compute_iou(anchor_boxes, gt_boxes)
                    positive_masks = ious >= pos_thresh
                    negative_masks = ious <= neg_thresh

                    if np.any(positive_masks):
                        target_scores[i, j, k, 1] = 1.
                        target_masks[i, j,
                                     k] = 1  # labeled as a positive sample
                        # find out which ground-truth box matches this anchor
                        max_iou_idx = np.argmax(ious)
                        selected_gt_boxes = gt_boxes[max_iou_idx]
                        target_bboxes[i, j, k] = compute_regression(
                            selected_gt_boxes, anchor_boxes[0])

                    if np.all(negative_masks):
                        target_scores[i, j, k, 0] = 1.
                        target_masks[i, j,
                                     k] = -1  # labeled as a negative sample
    return target_scores, target_bboxes, target_masks
 def evaluate(self, train_step):
     self.sess.run(tf.local_variables_initializer())
     all_y = np.zeros(
         (0, self.conf.height, self.conf.width, self.conf.depth))
     all_y_pred = np.zeros(
         (0, self.conf.height, self.conf.width, self.conf.depth))
     for step in range(self.num_val_batch):
         start = step * self.conf.val_batch_size
         end = (step + 1) * self.conf.val_batch_size
         x_val, y_val = self.data_reader.next_batch(start,
                                                    end,
                                                    mode='valid')
         feed_dict = {self.x: x_val, self.y: y_val, self.keep_prob: 1}
         self.sess.run([self.mean_loss_op, self.mean_accuracy_op],
                       feed_dict=feed_dict)
         y, y_pred = self.sess.run([self.y, self.y_pred],
                                   feed_dict=feed_dict)
         all_y = np.concatenate((all_y, y), axis=0)
         all_y_pred = np.concatenate((all_y_pred, y_pred), axis=0)
     IOU = compute_iou(all_y_pred, all_y, num_cls=self.conf.num_cls)
     mean_IOU = np.mean(IOU)
     summary_valid = self.sess.run(self.merged_summary, feed_dict=feed_dict)
     valid_loss, valid_acc = self.sess.run(
         [self.mean_loss, self.mean_accuracy])
     self.save_summary(summary_valid, train_step + self.conf.reload_step)
     if valid_acc > self.best_validation_accuracy:
         self.best_validation_accuracy = valid_acc
         improved_str = '(improved)'
         self.save(train_step + self.conf.reload_step)
     else:
         improved_str = ''
     print('-' * 25 + 'Validation' + '-' * 25)
     print(
         'After {0} training step: val_loss= {1:.4f}, val_acc={2:.01%}{3}'.
         format(train_step, valid_loss, valid_acc, improved_str))
     print(
         'BackGround={0:.01%}, Neuron={1:.01%}, Vessel={2:.01%}, Average={3:.01%}'
         .format(IOU[0], IOU[1], IOU[2], mean_IOU))
     print('-' * 60)
Пример #15
0
def train_epoch(model, dataloader, criterion: dict, optimizer, scheduler,
                epoch, device):
    model.train()
    bar = tqdm(dataloader)
    bar.set_description(f'epoch {epoch:2}')
    correct, total = 0, 0
    for X, y in bar:
        X, gt_cls, gt_bbox = X.to(device), y['cls'].to(device), y['bbox'].to(
            device)
        logits, bbox = model(X)
        loss = criterion['cls'](logits,
                                gt_cls) + 10 * criterion['box'](bbox, gt_bbox)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        correct += sum(
            (torch.argmax(logits, axis=1) == gt_cls).cpu().detach().numpy()
            & (compute_iou(bbox.cpu(), gt_bbox.cpu()) > iou_thr))
        total += len(X)
        bar.set_postfix_str(
            f'lr={scheduler.get_last_lr()[0]:.4f} acc={correct / total * 100:.2f} loss={loss.item():.2f}'
        )
    scheduler.step()
 def test(self, step_num):
     self.sess.run(tf.local_variables_initializer())
     self.reload(step_num)
     self.data_reader = DataLoader(self.conf)
     self.numTest = self.data_reader.count_num_samples(mode='test')
     self.num_test_batch = int(self.numTest / self.conf.val_batch_size)
     self.is_train = False
     self.sess.run(tf.local_variables_initializer())
     all_y = np.zeros(
         (0, self.conf.height, self.conf.width, self.conf.depth))
     all_y_pred = np.zeros(
         (0, self.conf.height, self.conf.width, self.conf.depth))
     for step in range(self.num_test_batch):
         start = step * self.conf.val_batch_size
         end = (step + 1) * self.conf.val_batch_size
         x_test, y_test = self.data_reader.next_batch(start,
                                                      end,
                                                      mode='test')
         feed_dict = {self.x: x_test, self.y: y_test, self.keep_prob: 1}
         self.sess.run([self.mean_loss_op, self.mean_accuracy_op],
                       feed_dict=feed_dict)
         y, y_pred = self.sess.run([self.y, self.y_pred],
                                   feed_dict=feed_dict)
         all_y = np.concatenate((all_y, y), axis=0)
         all_y_pred = np.concatenate((all_y_pred, y_pred), axis=0)
     IOU = compute_iou(all_y_pred, all_y, num_cls=self.conf.num_cls)
     mean_IOU = np.mean(IOU)
     test_loss, test_acc = self.sess.run(
         [self.mean_loss, self.mean_accuracy])
     print('-' * 18 + 'Test Completed' + '-' * 18)
     print('test_loss= {0:.4f}, test_acc={1:.01%}'.format(
         test_loss, test_acc))
     print(
         'BackGround={0:.01%}, Neuron={1:.01%}, Vessel={2:.01%}, Average={3:.01%}'
         .format(IOU[0], IOU[1], IOU[2], mean_IOU))
     print('-' * 50)
Пример #17
0
     #                                       num_workers=opt.num_workers,
     #                                       val_ratio=0.1, pin_memory=opt.pin_memory)
     if opt.n_gpu > 1:
         model = nn.DataParallel(model)
     if opt.is_cuda:
         model = model.cuda()
     optimizer = optim.Adam(model.parameters(),
                            lr=opt.learning_rate,
                            weight_decay=opt.weight_decay)
     criterion = nn.BCELoss().cuda()
     # start to run a training
     run_train(model, train_loader, val_loader, opt, criterion)
     # make prediction on validation set
     predictions, img_ids = run_test(model, val_loader, opt)
     # compute IOU between prediction and ground truth masks
     compute_iou(predictions, img_ids, val_loader)
     # SAVE model
     if opt.save_model:
         torch.save(model.state_dict(),
                    os.path.join(opt.checkpoint_dir, 'model-01.pt'))
 else:
     # load testing data for making predictions
     test_loader = get_test_loader(opt.test_dir,
                                   batch_size=opt.batch_size,
                                   shuffle=opt.shuffle,
                                   num_workers=opt.num_workers,
                                   pin_memory=opt.pin_memory)
     # load the model and run test
     model.load_state_dict(
         torch.load(os.path.join(opt.checkpoint_dir, 'model-01.pt')))
     if opt.n_gpu > 1:
Пример #18
0
def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks,
                            config):
    """Generate targets for training Stage 2 classifier and mask heads.
    This is not used in normal training. It's useful for debugging or to train
    the Mask RCNN heads without using the RPN head.

    Inputs:
    rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes.
    gt_class_ids: [instance count] Integer class IDs
    gt_boxes: [instance count, (y1, x1, y2, x2)]
    gt_masks: [height, width, instance count] Grund truth masks. Can be full
              size or mini-masks.

    Returns:
    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]
    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
    bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific
            bbox refinements.
    masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped
           to bbox boundaries and resized to neural network output size.
    """
    assert rpn_rois.shape[0] > 0
    assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format(
        gt_class_ids.dtype)
    assert gt_boxes.dtype == np.int32, "Expected int but got {}".format(
        gt_boxes.dtype)
    assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format(
        gt_masks.dtype)

    # It's common to add GT Boxes to ROIs but we don't do that here because
    # according to XinLei Chen's paper, it doesn't help.

    # Trim empty padding in gt_boxes and gt_masks parts
    instance_ids = np.where(gt_class_ids > 0)[0]
    assert instance_ids.shape[0] > 0, "Image must contain instances."
    gt_class_ids = gt_class_ids[instance_ids]
    gt_boxes = gt_boxes[instance_ids]
    gt_masks = gt_masks[:, :, instance_ids]

    # Compute areas of ROIs and ground truth boxes.
    rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \
        (rpn_rois[:, 3] - rpn_rois[:, 1])
    gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \
        (gt_boxes[:, 3] - gt_boxes[:, 1])

    # Compute overlaps [rpn_rois, gt_boxes]
    overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0]))
    for i in range(overlaps.shape[1]):
        gt = gt_boxes[i]
        overlaps[:, i] = utils.compute_iou(gt, rpn_rois, gt_box_area[i],
                                           rpn_roi_area)

    # Assign ROIs to GT boxes
    rpn_roi_iou_argmax = np.argmax(overlaps, axis=1)
    rpn_roi_iou_max = overlaps[np.arange(overlaps.shape[0]),
                               rpn_roi_iou_argmax]
    # GT box assigned to each ROI
    rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax]
    rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax]

    # Positive ROIs are those with >= 0.5 IoU with a GT box.
    fg_ids = np.where(rpn_roi_iou_max > 0.5)[0]

    # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining)
    # TODO: To hard example mine or not to hard example mine, that's the question
    #     bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0]
    bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]

    # Subsample ROIs. Aim for 33% foreground.
    # FG
    fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO)
    if fg_ids.shape[0] > fg_roi_count:
        keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False)
    else:
        keep_fg_ids = fg_ids
    # BG
    remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0]
    if bg_ids.shape[0] > remaining:
        keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
    else:
        keep_bg_ids = bg_ids
    # Combine indicies of ROIs to keep
    keep = np.concatenate([keep_fg_ids, keep_bg_ids])
    # Need more?
    remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0]
    if remaining > 0:
        # Looks like we don't have enough samples to maintain the desired
        # balance. Reduce requirements and fill in the rest. This is
        # likely different from the Mask RCNN paper.

        # There is a small chance we have neither fg nor bg samples.
        if keep.shape[0] == 0:
            # Pick bg regions with easier IoU threshold
            bg_ids = np.where(rpn_roi_iou_max < 0.5)[0]
            assert bg_ids.shape[0] >= remaining
            keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
            assert keep_bg_ids.shape[0] == remaining
            keep = np.concatenate([keep, keep_bg_ids])
        else:
            # Fill the rest with repeated bg rois.
            keep_extra_ids = np.random.choice(keep_bg_ids,
                                              remaining,
                                              replace=True)
            keep = np.concatenate([keep, keep_extra_ids])
    assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \
        "keep doesn't match ROI batch size {}, {}".format(
            keep.shape[0], config.TRAIN_ROIS_PER_IMAGE)

    # Reset the gt boxes assigned to BG ROIs.
    rpn_roi_gt_boxes[keep_bg_ids, :] = 0
    rpn_roi_gt_class_ids[keep_bg_ids] = 0

    # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement.
    rois = rpn_rois[keep]
    roi_gt_boxes = rpn_roi_gt_boxes[keep]
    roi_gt_class_ids = rpn_roi_gt_class_ids[keep]
    roi_gt_assignment = rpn_roi_iou_argmax[keep]

    # Class-aware bbox deltas. [y, x, log(h), log(w)]
    bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.NUM_CLASSES, 4),
                      dtype=np.float32)
    pos_ids = np.where(roi_gt_class_ids > 0)[0]
    bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement(
        rois[pos_ids], roi_gt_boxes[pos_ids, :4])
    # Normalize bbox refinements
    bboxes /= config.BBOX_STD_DEV

    # Generate class-specific target masks.
    masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0],
                      config.MASK_SHAPE[1], config.NUM_CLASSES),
                     dtype=np.float32)
    for i in pos_ids:
        class_id = roi_gt_class_ids[i]
        assert class_id > 0, "class id must be greater than 0"
        gt_id = roi_gt_assignment[i]
        class_mask = gt_masks[:, :, gt_id]

        if config.USE_MINI_MASK:
            # Create a mask placeholder, the size of the image
            placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool)
            # GT box
            gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id]
            gt_w = gt_x2 - gt_x1
            gt_h = gt_y2 - gt_y1
            # Resize mini mask to size of GT box
            placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \
                np.round(scipy.misc.imresize(class_mask.astype(float), (gt_h, gt_w),
                                             interp='nearest') / 255.0).astype(bool)
            # Place the mini batch in the placeholder
            class_mask = placeholder

        # Pick part of the mask and resize it
        y1, x1, y2, x2 = rois[i].astype(np.int32)
        m = class_mask[y1:y2, x1:x2]
        mask = scipy.misc.imresize(
            m.astype(float), config.MASK_SHAPE, interp='nearest') / 255.0
        masks[i, :, :, class_id] = mask

    return rois, roi_gt_class_ids, bboxes, masks
Пример #19
0
def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks):
    """Generate targets for training Stage 2 classifier and mask heads.
    This is not used in normal training. It's useful for debugging or to train
    the Mask RCNN heads without using the RPN head.

    Inputs:
    rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes.
    gt_class_ids: [instance count] Integer class IDs
    gt_boxes: [instance count, (y1, x1, y2, x2)]
    gt_masks: [height, width, instance count] Ground truth masks. Can be full
              size or mini-masks.

    Returns:
    rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]
    class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
    bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific
            bbox refinements.
    masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped
           to bbox boundaries and resized to neural network output size.
    """
    assert rpn_rois.shape[0] > 0
    assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format(
        gt_class_ids.dtype)
    assert gt_boxes.dtype == np.int32, "Expected int but got {}".format(
        gt_boxes.dtype)
    assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format(
        gt_masks.dtype)

    instance_ids = np.where(gt_class_ids > 0)[0]
    assert instance_ids.shape[0] > 0, "Image must contain instances."
    gt_class_ids = gt_class_ids[instance_ids]
    gt_boxes = gt_boxes[instance_ids]
    gt_masks = gt_masks[:, :, instance_ids]

    rpn_rois_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * (rpn_rois[:, 3] -
                                                         rpn_rois[:, 1])
    gt_boxes_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] -
                                                         gt_boxes[:, 1])

    overlaps = np.zeros([rpn_rois.shape[0], gt_boxes.shape[0]])
    for i in range(gt_boxes.shape[0]):
        box = gt_boxes[i]
        overlaps[:, i] = utils.compute_iou(box, rpn_rois, gt_boxes_area[i],
                                           rpn_rois_area)

    rpn_rois_iou_argmax = np.argmax(overlaps, axis=1)
    rpn_rois_iou_max = overlaps[np.arange(overlaps.shape[0]),
                                rpn_rois_iou_argmax]

    rpn_roi_gt_boxes = gt_boxes[rpn_rois_iou_argmax]
    rpn_roi_gt_class_ids = gt_class_ids[rpn_rois_iou_argmax]

    fg_ids = np.where(rpn_rois_iou_max > 0.5)[0]
    bg_ids = np.where(rpn_rois_iou_max < 0.5)[0]

    fg_count = int(hyper_parameters.FLAGS.ROI_POSITIVE_RATIO *
                   hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE)
    if fg_ids.shape[0] > fg_count:
        keep_fg_ids = np.random.choice(fg_ids, fg_count, replace=False)
    else:
        keep_fg_ids = fg_ids

    remaining = hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[
        0]
    if bg_ids.shape[0] > remaining:
        keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False)
    else:
        keep_bg_ids = bg_ids

    keep = np.concatenate([keep_fg_ids, keep_bg_ids])
    remaining = hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE - keep.shape[0]

    if remaining > 0:
        keep_extra_ids = np.random.choice(keep_bg_ids, remaining, replace=True)
        keep = np.concatenate([keep, keep_extra_ids])

    assert keep.shape[0] == hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE, \
        "keep doesn't match ROI batch size {}, {}".format(
            keep.shape[0], hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE)

    rpn_roi_gt_boxes[keep_bg_ids, :] = 0
    rpn_roi_gt_class_ids[keep_bg_ids] = 0

    rois = rpn_rois[keep]
    roi_gt_boxes = rpn_roi_gt_boxes[keep]
    roi_gt_class_ids = rpn_roi_gt_class_ids[keep]
    roi_gt_assignment = rpn_rois_iou_argmax[keep]

    bboxes = np.zeros([
        hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE,
        hyper_parameters.FLAGS.NUM_CLASSES, 4
    ],
                      dtype=np.float32)
    pos_ids = np.where(roi_gt_class_ids > 0)[0]
    bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement(
        rois[pos_ids], roi_gt_boxes[pos_ids][:4])

    bboxes /= hyper_parameters.FLAGS.BBOX_STD_DEV

    masks = np.zeros((hyper_parameters.FLAGS.TRAIN_ROIS_PER_IMAGE,
                      hyper_parameters.FLAGS.MASK_SHAPE[0],
                      hyper_parameters.FLAGS.MASK_SHAPE[1],
                      hyper_parameters.FLAGS.NUM_CLASSES),
                     dtype=np.float32)

    for i in pos_ids:
        class_id = roi_gt_class_ids[i]
        assert class_id > 0, "class id must be greater than 0"
        assert isinstance(i, int)
        gt_id = roi_gt_assignment[i]
        class_mask = gt_masks[:, :, gt_id]

        if hyper_parameters.FLAGS.USE_MINI_MASK:
            # Create a mask placeholder, the size of the image
            placeholder = np.zeros(hyper_parameters.FLAGS.IMAGE_SHAPE[:2],
                                   dtype=bool)
            # GT box
            gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id]
            gt_w = gt_x2 - gt_x1
            gt_h = gt_y2 - gt_y1
            # Resize mini mask to size of GT box
            placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \
                np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool)
            # Place the mini batch in the placeholder
            class_mask = placeholder

        # Pick part of the mask and resize it
        y1, x1, y2, x2 = rois[i].astype(np.int32)
        m = class_mask[y1:y2, x1:x2]
        mask = utils.resize(m, hyper_parameters.FLAGS.MASK_SHAPE)
        masks[i, :, :, class_id] = mask

    return rois, roi_gt_class_ids, bboxes, masks
Пример #20
0
def get_anomalies_sequential(video_reader,
                             reid_model_path,
                             fbf_results_dict,
                             static_results_dict,
                             ignore_matrix_gen=None,
                             reid_model_name="resnet50",
                             start_frame=1,
                             frame_interval=20,
                             abnormal_duration_thresh=60,
                             detect_thresh=5,
                             undetect_thresh=8,
                             score_thresh=0.3,
                             light_thresh=0.8,
                             anomaly_score_thresh=0.7,
                             similarity_thresh=0.95,
                             suspicious_time_thresh=18,
                             verbose=False,
                             anomaly_nms_thresh=0.8):
    """
    Performs the anomaly detection. Sequential version

    video_reader: VideoReader object for raw video
    reid_model_path: path to re-ID model checkpoint
    fbf_results_dict: ResultsDict object for frame-by-frame/raw video detection results
    static_results_dict: ResultsDict object for static/background detection results
    ignore_matrix_gen: generator yielding ignore matrix, must have the same interval as frame_interval.
        Or single numpy array, or path to .npy file.
    reid_model_name: backbone used for reid model
    start_frame: video frame to start from
    frame_interval: interval between frames to do calculations on
    abnormal_duration_thresh: duration (in seconds) to consider an object abnormal
    detect_thresh: duration (in frames) to consider an object for tracking
    undetect_thresh: duration (in frames) to stop considering an object for tracking
    score_thresh: detection score threshold for bounding boxes
    light_thresh: brightness threshold (not sure what it does)
    anomaly_score_thresh: threshold to consider an object an anomaly
    similarity_thresh: threshold for object re-ID
    suspicious_time_thresh: duration (in seconds) for an object to be considered suspicious
    verbose: verbose printing
    anomaly_nms_thresh: IoU threshold for anomaly NMS.


    """
    def get_ignore_gen(ign_matrix):
        """
        Handles different inputs for ignore matrix

        :param ign_matrix:
        :return:
        """

        if isinstance(ign_matrix, types.GeneratorType):
            return ign_matrix

        # load/create matrix
        if ign_matrix is None:
            matrix = np.ones((h, w), dtype=bool)  # Dont ignore anything

        elif type(ign_matrix) == str:  # filename
            matrix = np.load(ign_matrix).astype(bool)

        else:
            raise TypeError("Invalid ignore matrix type:", type(ign_matrix))

        return (matrix for _ in iter(int, 1))  # infinite generator

    # Get video data
    num_frames, framerate, image_shape = video_reader.nframes, video_reader.framerate, video_reader.img_shape

    # load model
    reid_model = ReidExtractor(reid_model_name, reid_model_path)

    # Set up information matrices
    h, w, _ = image_shape

    ignore_matrix_gen = get_ignore_gen(ignore_matrix_gen)

    detect_count_matrix = np.zeros((h, w))
    undetect_count_matrix = np.zeros((h, w))
    start_time_matrix = np.zeros((h, w))
    end_time_matrix = np.zeros((h, w))
    score_matrix = np.zeros((h, w))
    state_matrix = np.zeros(
        (h, w), dtype=bool
    )  # State matrix, 0/1 distinguishes suspicious candidate states

    if verbose:
        print(
            f"total frames: {num_frames}, framerate: {framerate}, height: {h}, width: {w}"
        )
        print("-------------------------")

    ### Main loop
    start = False
    tmp_start = False
    all_results = []
    anomaly_now = {}
    for frame in range(start_frame, num_frames, frame_interval):
        try:
            ignore_matrix = next(ignore_matrix_gen)

            # if frame % (10*30) == 0:
            #     plt.imshow(ignore_matrix)
            #     plt.show()
        except StopIteration:
            pass  # keep same ignore matrix

        # Comment out if not using crop boxes, not needed
        # if fbf_results_dict.max_frame < static_results_dict.max_frame:
        #     fbf_results_dict.gen_next()

        # create tmp_score, tmp_detect
        static_results = static_results_dict[frame]
        if static_results is not None:
            boxes = static_results.loc[
                static_results["score"] > score_thresh,
                ["x1", "y1", "x2", "y2", "score"]].values
        else:
            boxes = []

        tmp_score, tmp_detect = add_boxes(boxes, ignore_matrix)

        ### plotting
        # img = video_reader.get_frame(frame)
        # cmap = plt.get_cmap("viridis")
        # for x1, y1, x2, y2, score in boxes:
        #     x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
        #     col = tuple(int(c * 255) for c in cmap(score)[:3])
        #     cv.rectangle(img, (x1, y1), (x2, y2), col, thickness=2)
        #
        # if frame % 12 == 0:
        #     plt.imshow(img)
        #     plt.show()
        ###

        if verbose:
            print(f"frame: {frame}")

            if len(boxes) > 0:
                print("\tboxes:", len(boxes))

        score_matrix += tmp_score  # add running totals
        detect_count_matrix += tmp_detect

        # Update detection matrices
        undetect_count_matrix += ~tmp_detect
        undetect_count_matrix[tmp_detect] = 0

        # Update time matrices
        start_time_matrix[
            detect_count_matrix ==
            1] = -600 if frame == 1 else frame  # why -600 for frame 1?
        end_time_matrix[detect_count_matrix > 0] = frame

        # Update state matrices
        state_matrix[detect_count_matrix > detect_thresh] = True

        # Detect anomaly
        time_delay = utils.mask(end_time_matrix - start_time_matrix,
                                state_matrix)
        delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape)

        #         print(f"\tmax delay: {time_delay.max()}, start: {start_time_matrix[delay_max_idx]}, end: {end_time_matrix[delay_max_idx]}, state: {state_matrix[delay_max_idx]}")
        if not start and time_delay.max(
        ) / framerate > abnormal_duration_thresh:  # and score_matrix[delay_max_idx]/detect_count_matrix[delay_max_idx]>0.8:

            delay_max_idx = np.unravel_index(time_delay.argmax(),
                                             time_delay.shape)

            # backtrack the start time
            time_frame = int(start_time_matrix[delay_max_idx] /
                             5) * 5  # + 1  # why 5s and 1?

            G = np.where(
                detect_count_matrix < detect_count_matrix[delay_max_idx] - 2,
                0, 1)  # What does G represent?, why -2?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomaly_now and (
                    time_frame / framerate -
                    anomaly_now['end_time']) < 30:  # why 30?
                f1_frame_num = max(1, anomaly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = reid_model.similarity(
                    video_reader.get_frame(f1_frame_num),
                    video_reader.get_frame(f2_frame_num),
                    anomaly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(anomaly_now['start_time'] * framerate /
                                     5) * 5  # + 1  # why 5s and 1?
                else:
                    anomaly_now['region'] = region

            else:
                anomaly_now['region'] = region

            # IoU stuff
            max_iou = 1
            count = 1
            start_time = time_frame
            tmp_len = 1
            raio = 1
            while (max_iou > 0.1 or tmp_len < 40
                   or raio > 0.6) and time_frame > 1:  # why 0.1, 40, 0.6?
                raio = count / tmp_len

                print("time frame:", time_frame)
                fbf_results = fbf_results_dict[time_frame]
                if fbf_results is not None:
                    bboxes = fbf_results[["x1", "y1", "x2", "y2",
                                          "score"]].values
                    max_iou = utils.compute_iou(anomaly_now['region'], bboxes)

                else:
                    max_iou = 0

                time_frame -= 5  # why 5?
                if max_iou > 0.3:  # why 0.3?
                    count += 1
                    if max_iou > 0.5:  # why 0.5?  # they mention 0.5 IoU in the paper for NMS, might not be this
                        start_time = time_frame

                tmp_len += 1

            # back track start_time, until brightness at that spot falls below a threshold
            for time_frame in range(start_time, 1, -5):
                #                 print(f"\ttimeframe: {time_frame}")
                tmp_im = video_reader.get_frame(time_frame)
                if utils.compute_brightness(
                        tmp_im[region[1]:region[3],
                               region[0]:region[2]]) <= light_thresh:
                    break

                start_time = time_frame

            anomaly_now['start_time'] = max(0, start_time / framerate)
            anomaly_now['end_time'] = max(
                0, end_time_matrix[delay_max_idx] / framerate)
            start = True

        elif not tmp_start and time_delay.max(
        ) > suspicious_time_thresh * framerate:
            time_frame = start_time_matrix[delay_max_idx]

            G = np.where(
                detect_count_matrix < detect_count_matrix[delay_max_idx] - 2,
                0, 1)  # what does G represent?
            region = utils.search_region(G, delay_max_idx)

            # vehicle reid
            if 'start_time' in anomaly_now and (
                    time_frame / framerate -
                    anomaly_now['end_time']) < 30:  # why 30?
                f1_frame_num = max(1, anomaly_now['start_time'] * framerate)
                f2_frame_num = max(1, time_frame)

                similarity = reid_model.similarity(
                    video_reader.get_frame(f1_frame_num),
                    video_reader.get_frame(f2_frame_num),
                    anomaly_now["region"], region)

                if similarity > similarity_thresh:
                    time_frame = int(
                        anomaly_now['start_time'] * framerate / 5) * 5 + 1
                    region = anomaly_now['region']

            anomaly_now['region'] = region
            anomaly_now['start_time'] = max(0, time_frame / framerate)
            anomaly_now['end_time'] = max(
                0, end_time_matrix[delay_max_idx] / framerate)

            tmp_start = True

        if start and time_delay.max() / framerate > abnormal_duration_thresh:

            delay_max_idx = np.unravel_index(time_delay.argmax(),
                                             time_delay.shape)

            if undetect_count_matrix[delay_max_idx] > undetect_thresh:
                anomaly_score = score_matrix[
                    delay_max_idx] / detect_count_matrix[delay_max_idx]

                print("\t", anomaly_now, anomaly_score)
                if anomaly_score > anomaly_score_thresh:
                    anomaly_now['end_time'] = end_time_matrix[
                        delay_max_idx] / framerate
                    anomaly_now['score'] = anomaly_score

                    all_results.append(anomaly_now)
                    anomaly_now = {}

                start = False

        elif tmp_start and time_delay.max(
        ) > suspicious_time_thresh * framerate:
            if undetect_count_matrix[delay_max_idx] > undetect_thresh:

                anomaly_score = score_matrix[
                    delay_max_idx] / detect_count_matrix[delay_max_idx]
                if anomaly_score > anomaly_score_thresh:
                    anomaly_now['end_time'] = end_time_matrix[
                        delay_max_idx] / framerate
                    anomaly_now['score'] = anomaly_score

                tmp_start = False

        # undetect matrix change state_matrix
        state_matrix[undetect_count_matrix > undetect_thresh] = False
        undetect_count_matrix[undetect_count_matrix > undetect_thresh] = 0

        # update matrix
        tmp_detect |= state_matrix
        detect_count_matrix = utils.mask(detect_count_matrix, tmp_detect)
        score_matrix = utils.mask(score_matrix, tmp_detect)

    # Add all anomalies to the results list
    print("---", start, time_delay.max(), score_matrix[delay_max_idx],
          detect_count_matrix[delay_max_idx])
    if start and time_delay.max() > abnormal_duration_thresh * framerate:
        anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[
            delay_max_idx]
        if anomaly_score > anomaly_score_thresh:
            anomaly_now[
                'end_time'] = end_time_matrix[delay_max_idx] / framerate
            anomaly_now['score'] = anomaly_score

            all_results.append(anomaly_now)
            anomaly_now = {}
            start = False

    # Apply Non-Maximal Supression to the results
    if all_results:
        nms_out = utils.anomaly_nms(all_results, anomaly_nms_thresh)

        #         final_result = {'start_time': 892, 'score': 0} # why 892?
        #         for nms_start_time, nms_end_time in nms_out[:, 5:7]:
        #             if nms_start_time < final_result["start_time"]:
        #                 final_result["start_time"] = max(0, int(nms_start_time - 1))
        #                 final_result["score"] = 1
        #                 final_result["end_time"] = nms_end_time

        final_results = pd.DataFrame(nms_out,
                                     columns=[
                                         "x1", "y1", "x2", "y2", "score",
                                         "start_time", "end_time"
                                     ])

        return final_results

    return None
Пример #21
0
def get_overall_IOU(boundingBoxes, sort_inds, g_fnames, q_fnames):
    """
        boudningBoxes: boundingBoxes class which is the list all the bounding boxes in the images
        g_fnames = gallyer filenames. without the extension (no .png) eg. '24889' 
    """
    n_query = len(q_fnames)
    avgIouArray = np.zeros((n_query, 5))
    weightedIouArray = np.zeros((n_query, 5))
    allClasses = boundingBoxes.getClasses()
    classIou = dict([(key, []) for key in allClasses])

    for i in range((sort_inds.shape[0])):  #Iterate over all the query images
        ts = time.time()
        qImageName = q_fnames[i]
        qBBoxes = boundingBoxes.getBoundingBoxesByImageName(qImageName)

        for j in range(5):  # Iterate over top-5 retrieved images
            rImageName = g_fnames[sort_inds[i][j]]
            rBBoxes = boundingBoxes.getBoundingBoxesByImageName(rImageName)

            iouTemp = []
            weights = []

            #Iterate over each element(boudingbox)
            for bb in qBBoxes:  # qbbs query bounding boxes
                bb_cordinates = bb.getBoundingBox()
                bb_class = bb.classId

                #get the bouding box in retrieved image that has same class
                rbbs = [d for d in rBBoxes if d.classId == bb_class]

                iouMax = 0  # sys.float_info.min
                for rbb in rbbs:
                    assert (rbb.classId == bb_class)
                    rbb_cordinates = rbb.getBoundingBox()
                    iou = compute_iou(bb_cordinates, rbb_cordinates)
                    if iou > iouMax:
                        iouMax = iou
                    if iou < 0:
                        print('Warning!!: Negative iou found ', 'ImageName:',
                              rbb.getImageName(), '  bounding box',
                              rbb.getBoundingBox())
                    assert (iouMax >= 0)
                #Store iou with best matched component label
                iouTemp.append(iouMax)
                weights.append(bb_cordinates[2] * bb_cordinates[3])
                # Update iou into coressponding ClassIou
                classIou[bb_class].append(iouMax)

            avgIouArray[i][j] = np.mean(
                iouTemp)  # Average Iou between a query and a retrieved image

            weightTotal = np.sum(weights)
            weights = np.divide(weights, weightTotal)
            weightedIou = sum(iouTemp * weights)
            weightedIouArray[i][j] = weightedIou

        #print('Computing IoU metric: {}/{}'.format(i,50))
        #print('Time for query{} = {}'.format(i, time.time()-ts))
        print('Computing  IoU: {}/{}  in time {}'.format(
            i, n_query,
            time.time() - ts))
        ts = time.time()

    meanAvgIou = np.mean(avgIouArray, axis=1)
    overallMeanIou = np.mean(meanAvgIou)

    meanWeightedIou = np.mean(weightedIouArray, axis=1)
    overallMeanWeightedIou = np.mean(meanWeightedIou)

    print('Completed computing IoU metric: {}/{}'.format(i + 1, n_query))

    return overallMeanIou, overallMeanWeightedIou, classIou
    netD = netD()
    criterion = torch.nn.BCELoss()
    # netD.apply(weights_init)
    if Opt.ngpu > 1:
        netG = nn.DataParallel(netG)
        netD = nn.DataParallel(netD)
    if Opt.is_cuda:
        netG = netG.cuda()
        netD = netD.cuda()
        criterion = criterion.cuda()

    # Optimizers
    optimizerG = torch.optim.Adam(netG.parameters(),
                                  lr=Opt.lr,
                                  betas=Opt.betas,
                                  weight_decay=Opt.weight_decay)
    optimizerD = torch.optim.Adam(netD.parameters(),
                                  lr=Opt.lr,
                                  betas=Opt.betas,
                                  weight_decay=Opt.weight_decay)

    if Opt.is_train:
        # predictions, img_ids = test(netG, val_loader)
        # compute_iou(predictions, img_ids, 'UNet_IOU')
        train(train_loader, netD, netG, criterion, optimizerG, optimizerD)
        # predictions, img_ids = test(netG, val_loader)
        # compute_iou(predictions, img_ids, 'GAN_IOU')
    else:
        predictions, img_ids = test(netG, val_loader)
        compute_iou(predictions, img_ids, 'UNet_IOU')
Пример #23
0
                pred_score.append(score[i, j, k, 1])

pred_boxes = np.array(pred_boxes)
pred_score = np.array(pred_score)

# selected_boxes = pred_boxes
selected_boxes = []
while len(pred_boxes) > 0:
    max_idx = np.argmax(pred_score)
    selected_box = pred_boxes[max_idx]
    selected_boxes.append(selected_box)
    pred_boxes = np.concatenate(
        [pred_boxes[:max_idx], pred_boxes[max_idx + 1:]])
    pred_score = np.concatenate(
        [pred_score[:max_idx], pred_score[max_idx + 1:]])
    ious = compute_iou(selected_box, pred_boxes)
    iou_mask = ious <= 0.1
    pred_boxes = pred_boxes[iou_mask]
    pred_score = pred_score[iou_mask]

selected_boxes = np.array(selected_boxes)
plot_boxes_on_image(raw_image, selected_boxes)
Image.fromarray(np.uint8(raw_image)).show()

grid_size = [45, 60]

grid_x = tf.range(grid_size[0], dtype=tf.int32)
grid_y = tf.range(grid_size[1], dtype=tf.int32)
a, b = tf.meshgrid(grid_x, grid_y)
x_offset = tf.reshape(a, (-1, 1))
y_offset = tf.reshape(b, (-1, 1))
Пример #24
0
# 假设 图片中的两个目标框"ground-truth"
bbox = np.asarray([[20, 30, 400, 500], [300, 400, 500, 600]],
                  dtype=np.float32)  # [y1, x1, y2, x2] format
# 假设 图片中两个目标框分别对应的标签
labels = np.asarray([6, 8], dtype=np.int8)  # 0 represents background

img_tensor = torch.zeros((1, 3, 800, 800)).float()
img_var = torch.autograd.Variable(img_tensor)

# ---------------------step_1: 获取目标anchor的置信度(anchor_conf)和平移缩放系数(anchor_locations)
# 初始化所有anchors, 并找出有效anchors和对应的index
# anchors: (22500, 4)  valid_anchor_boxes: (8940, 4)  valid_anchor_index:8940
anchors, valid_anchor_boxes, valid_anchor_index = utils.init_anchor()
# 计算有效anchors与所有目标框的IOU
# ious:(8940, 2) 每个有效anchor框与目标实体框的IOU
ious = utils.compute_iou(valid_anchor_boxes, bbox)
valid_anchor_len = len(valid_anchor_boxes)
# 在有效框中找到一定比例的正例和负例
label, argmax_ious = utils.get_pos_neg_sample(ious,
                                              valid_anchor_len,
                                              pos_iou_threshold=0.7,
                                              neg_iou_threshold=0.3,
                                              pos_ratio=0.5,
                                              n_sample=256)
# print np.sum(label == 1)  # 18个正例
# print np.sum(label == 0)  # 256-18=238个负例

# 现在让我们用具有最大iou的ground truth对象为每个anchor box分配位置。
# 注意,我们将为所有有效的anchor box分配anchor locs,而不考虑其标签,稍后在计算损失时,我们可以使用简单的过滤器删除它们。
# 每个有效anchor对应的目标框bbox
max_iou_bbox = bbox[argmax_ious]  # 有效anchor框对应的目标框坐标  (8940, 4)
Пример #25
0
            center_x = j * grid_width + grid_width * 0.5
            center_y = i * grid_height + grid_height * 0.5
            xmin = center_x - wandhG[k][0] * 0.5
            ymin = center_y - wandhG[k][1] * 0.5
            xmax = center_x + wandhG[k][0] * 0.5
            ymax = center_y + wandhG[k][1] * 0.5
            # print(xmin, ymin, xmax, ymax)
            # ignore cross-boundary anchors
            if (xmin > -5) & (ymin >
                              -5) & (xmax <
                                     (image_width + 5)) & (ymax <
                                                           (image_height + 5)):
                anchor_boxes = np.array([xmin, ymin, xmax, ymax])
                anchor_boxes = np.expand_dims(anchor_boxes, axis=0)
                # compute iou between this anchor and all ground-truth boxes in image.
                ious = compute_iou(anchor_boxes, gt_boxes)
                positive_masks = ious > pos_thresh
                negative_masks = ious < neg_thresh

                if np.any(positive_masks):
                    plot_boxes_on_image(encoded_image,
                                        anchor_boxes,
                                        thickness=1)
                    print("=> Encoding positive sample: %d, %d, %d" %
                          (i, j, k))
                    cv2.circle(encoded_image,
                               center=(int(0.5 * (xmin + xmax)),
                                       int(0.5 * (ymin + ymax))),
                               radius=1,
                               color=[255, 0, 0],
                               thickness=4)
Пример #26
0
def test(cfg, model, post_processor, criterion, device, test_loader, visfreq):
    model.eval()
    post_processor.eval()
    test_loss = 0
    correct = 0
    pixel_acc_list = []
    iou_list = []
    with torch.no_grad():
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            feature = model(data)
            output = post_processor(feature)
            test_loss += criterion(output, target).item()  # sum up batch loss
            if cfg.task == "classification":
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()
                # TODO: save classified images with raw image as content and
                # human readable label as filenames
            elif cfg.task == "semantic_segmentation":
                pred_map = output.max(dim=1)[1]
                batch_acc, _ = utils.compute_pixel_acc(
                    pred_map, target, fg_only=cfg.METRIC.SEGMENTATION.fg_only)
                pixel_acc_list.append(float(batch_acc))
                for i in range(pred_map.shape[0]):
                    pred_np = np.array(pred_map[i].cpu())
                    target_np = np.array(target[i].cpu(), dtype=np.int64)
                    iou = utils.compute_iou(
                        pred_np,
                        target_np,
                        cfg.num_classes,
                        fg_only=cfg.METRIC.SEGMENTATION.fg_only)
                    iou_list.append(float(iou))
                    if (i + 1) % visfreq == 0:
                        cv2.imwrite("{}_{}_pred.png".format(idx, i), pred_np)
                        cv2.imwrite("{}_{}_label.png".format(idx, i),
                                    target_np)
                        # Visualize RGB image as well
                        ori_rgb_np = np.array(data[i].permute((1, 2, 0)).cpu())
                        if 'normalize' in cfg.DATASET.TRANSFORM.TEST.transforms:
                            rgb_mean = cfg.DATASET.TRANSFORM.TEST.TRANSFORMS_DETAILS.NORMALIZE.mean
                            rgb_sd = cfg.DATASET.TRANSFORM.TEST.TRANSFORMS_DETAILS.NORMALIZE.sd
                            ori_rgb_np = (ori_rgb_np * rgb_sd) + rgb_mean
                        assert ori_rgb_np.max() <= 1.1, "Max is {}".format(
                            ori_rgb_np.max())
                        ori_rgb_np[ori_rgb_np >= 1] = 1
                        ori_rgb_np = (ori_rgb_np * 255).astype(np.uint8)
                        # Convert to OpenCV BGR
                        ori_rgb_np = cv2.cvtColor(ori_rgb_np,
                                                  cv2.COLOR_RGB2BGR)
                        cv2.imwrite("{}_{}_ori.jpg".format(idx, i), ori_rgb_np)
            else:
                raise NotImplementedError

    test_loss /= len(test_loader.dataset)

    if cfg.task == "classification":
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, len(test_loader.dataset),
                     100. * correct / len(test_loader.dataset)))
    elif cfg.task == "semantic_segmentation":
        print(
            '\nTest set: Average loss: {:.4f}, Mean Pixel Accuracy: {:.4f}, Mean IoU {:.4f}\n'
            .format(test_loss, np.mean(pixel_acc_list), np.mean(iou_list)))
    else:
        raise NotImplementedError
Пример #27
0
def get_overall_IOU_ndcg(boundingBoxes,sort_inds,g_fnames,q_fnames):      
    allClasses = boundingBoxes.getClasses()
    classIou = dict([(key, []) for key in allClasses])
    
    aNdcg = np.empty((1,0),float)
    wNdcg =np.empty((1,0),float)
    
    for i in  range((sort_inds.shape[0])):   #Iterate over all the query images 
        
        qImageName = q_fnames[i]
        qBBoxes = boundingBoxes.getBoundingBoxesByImageName(qImageName) 
        
        iouList = []
        weightedIouList = []
        
        time_s = time.time()
        #for j in range(len(g_fnames)):     # Iterate over all the gallery images instead of top-5
        for j in range(5): 
            rImageName = g_fnames[sort_inds[i][j]]
            rBBoxes = boundingBoxes.getBoundingBoxesByImageName(rImageName)
            
            iouTemp = []
            weights = []
            
            #Iterate over each element(boudingbox)
            for bb in qBBoxes:                              # qbbs query bounding boxes
                bb_cordinates = bb.getBoundingBox()              
                bb_class = bb.classId 
                
                #get the bouding box in retrieved image that has same class
                rbbs = [d for d in rBBoxes if d.classId == bb_class]
                
                iouMax = 0
                for rbb in rbbs:
                    assert(rbb.classId == bb_class)
                    rbb_cordinates = rbb.getBoundingBox()
                    iou =  compute_iou(bb_cordinates, rbb_cordinates)
                    if iou > iouMax:
                        iouMax = iou
                
                    if iou <0: 
                        print('Warning!!: Negative iou found ', 'ImageName:', rbb.getImageName(), '  bounding box', rbb.getBoundingBox() )
                    assert(iouMax>=0)    
                
                #Store iou with best matched component label
                iouTemp.append(iouMax)
                weights.append(bb_cordinates[2]*bb_cordinates[3])
                # Update iou into coressponding ClassIou
                classIou[bb_class].append(iouMax)
                
            current_iou = np.mean(iouTemp)     # Average Iou between a query and a retrieved image
            weightTotal = np.sum(weights)
            weights = np.divide(weights, weightTotal)
            current_weightedIou = sum(iouTemp*weights) 
            
            weightedIouList.append(current_weightedIou) 
            iouList.append(current_iou)
        
        aGain = ndcg_at_k(iouList,5)
        wGain = ndcg_at_k(weightedIouList,5)
        
        aNdcg = np.append(aNdcg,aGain)
        wNdcg = np.append(wNdcg,wGain)
        time_e = (time.time() - time_s)/3600
        print('Elasped time for one query: {:.3f}'.format(time_e))
    
    avg_aNdcg = np.mean(aNdcg)
    avg_wNdcg = np.mean(wNdcg)    

    return avg_aNdcg, avg_wNdcg