예제 #1
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                object_mask_bool[b, ..., 0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = tf.keras.backend.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(
         b,
         tf.cast(best_iou < ignore_thresh,
                 tf.keras.backend.dtype(true_box)))
     return b + 1, ignore_mask
예제 #2
0
def assign_detections_to_trackers(trackers_obj, detections_obj, iou_thrd=0.3):
    '''
    From current list of trackers and new detections, output matched detections,
    unmatchted trackers, unmatched detections.
    '''

    trackers = [temp_obj['bbox'] for temp_obj in trackers_obj]
    detections = [temp_obj['bbox'] for temp_obj in detections_obj]

    IOU_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
    Motion_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
    Shape_mat = np.zeros((len(trackers), len(detections)), dtype=np.float32)
    for t, trk in enumerate(trackers):
        #trk = convert_to_cv2bbox(trk)
        for d, det in enumerate(detections):
            #   det = convert_to_cv2bbox(det)
            IOU_mat[t, d] = utils.box_iou(trk, det)
            Motion_mat[t, d] = get_motion_score(trk, det)
            Shape_mat[t, d] = get_shape_score(trk, det)

    # Produces matches
    # Solve the maximizing the sum of IOU assignment problem using the
    # Hungarian algorithm (also known as Munkres algorithm)

    matched_idx = linear_assignment(-IOU_mat)

    unmatched_trackers, unmatched_detections = [], []
    for t, trk in enumerate(trackers):
        if (t not in matched_idx[:, 0]):
            unmatched_trackers.append(t)

    for d, det in enumerate(detections):
        if (d not in matched_idx[:, 1]):
            unmatched_detections.append(d)

    matches = []

    # For creating trackers we consider any detection with an
    # overlap less than iou_thrd to signifiy the existence of
    # an untracked object

    for m in matched_idx:
        if (IOU_mat[m[0], m[1]] < iou_thrd):
            unmatched_trackers.append(m[0])
            unmatched_detections.append(m[1])
        else:
            matches.append(m.reshape(1, 2))

    if (len(matches) == 0):
        matches = np.empty((0, 2), dtype=int)
    else:
        matches = np.concatenate(matches, axis=0)

    return matches, np.array(unmatched_detections), np.array(
        unmatched_trackers)
예제 #3
0
def nms_fine_tune(detected_objects_list, th=0.5):
    ret_list = []
    for i in range(len(detected_objects_list)):
        is_append = True
        for j in range(len(detected_objects_list)):
            if i == j:
                continue
            iou = box_iou(detected_objects_list[i]['bbox'],
                          detected_objects_list[j]['bbox'])
            if iou > th and _box_area(
                    detected_objects_list[i]['bbox']) <= _box_area(
                        detected_objects_list[j]['bbox']):
                is_append = False
                break
        if is_append:
            ret_list.append(detected_objects_list[i])
    return ret_list
예제 #4
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.5] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
예제 #5
0
    def validation_step(self, opt, outputs, batch, batch_idx, epoch):
        imgs, targets, paths, shapes, pad = batch
        _, _, height, width = imgs.shape
        
        inf_out, train_out = outputs
        whwh = torch.Tensor([width, height, width, height]).to(imgs.device)

        losses = compute_loss(train_out, targets, self.model)[1][:3]  # GIoU, obj, cls
        output = non_max_suppression(inf_out, conf_thres=opt.conf_thres, iou_thres=opt.iou_thres, multi_label=self.calc_ni(batch_idx, epoch) > self.n_burn)

        # Statistics per image
        for si, pred in enumerate(output):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            self.seen += 1

            if pred is None:
                if nl:
                    self.stats.append((torch.zeros(0, self.niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
                continue

            # Append to text file
            # with open('test.txt', 'a') as file:
            #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]

            # Clip boxes to image bounds
            clip_coords(pred, (height, width))

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], self.niou, dtype=torch.bool, device=imgs.device)

            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5]) * whwh

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (cls == tcls_tensor).nonzero().view(-1)  # target indices
                    pi = (cls == pred[:, 5]).nonzero().view(-1)  # prediction indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1)  # best ious, indices

                        # Append detections
                        for j in (ious > self.iouv[0].to(ious.device)).nonzero():
                            d = ti[i[j]]  # detected target
                            if d not in detected:
                                detected.append(d)
                                correct[pi[j]] = ious[j] > self.iouv  # iou_thres is 1xn
                                if len(detected) == nl:  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            self.stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
        return losses
def main():
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = './yolov3.onnx'
    engine_file_path = "yolov3.trt"
    data_path = "./data/unrel.data"

    data = parse_data_cfg(data_path)
    nc = int(data['classes'])  # number of classes
    path = data['valid']  # path to test images
    names = load_classes(data['names'])  # class names

    iouv = torch.linspace(0.5, 0.95, 1,
                          dtype=torch.float32)  # iou vector for [email protected]:0.95
    niou = 1

    conf_thres = 0.001
    iou_thres = 0.6
    verbose = True

    # Genearte custom dataloader
    img_size = 448  # copy form pytorch src
    batch_size = 16

    dataset = LoadImagesAndLabels(path, img_size, batch_size, rect=True)
    batch_size = min(batch_size, len(dataset))
    dataloader = data_loader(dataset, batch_size, img_size)

    # Output shapes expected by the post-processor
    output_shapes = [(16, 126, 14, 14), (16, 126, 28, 28), (16, 126, 56, 56)]

    # Do inference with TensorRT
    trt_outputs = []
    with get_engine(onnx_file_path, engine_file_path
                    ) as engine, engine.create_execution_context() as context:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R',
                                     '[email protected]', 'F1')
        p, r, f1, mp, mr, map, mf1, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
        pbar = tqdm.tqdm(dataloader, desc=s)
        stats, ap, ap_class = [], [], []
        seen = 0

        for batch_i, (imgs, targets, paths, shapes) in enumerate(pbar):

            imgs = imgs.astype(np.float32) / 255.0
            nb, _, height, width = imgs.shape  # batch size, channels, height, width
            whwh = np.array([width, height, width, height])

            inputs[0].host = imgs

            postprocessor_args = {
                "yolo_masks": [
                    (6, 7, 8), (3, 4, 5), (0, 1, 2)
                ],  # A list of 3 three-dimensional tuples for the YOLO masks
                "yolo_anchors": [
                    (10, 13),
                    (16, 30),
                    (33, 23),
                    (30, 61),
                    (
                        62, 45
                    ),  # A list of 9 two-dimensional tuples for the YOLO anchors
                    (59, 119),
                    (116, 90),
                    (156, 198),
                    (373, 326)
                ],
                "num_classes":
                37,
                "stride": [32, 16, 8]
            }

            postprocessor = PostprocessYOLO(**postprocessor_args)

            # Do layers before yolo
            t = time.time()
            trt_outputs = common.do_inference_v2(context,
                                                 bindings=bindings,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 stream=stream)

            trt_outputs = [
                output.reshape(shape)
                for output, shape in zip(trt_outputs, output_shapes)
            ]

            trt_outputs = [
                np.ascontiguousarray(
                    otpt[:, :, :int(imgs.shape[2] * (2**i) /
                                    32), :int(imgs.shape[3] * (2**i) / 32)],
                    dtype=np.float32) for i, otpt in enumerate(trt_outputs)
            ]

            output_list = postprocessor.process(trt_outputs)

            t0 += time.time() - t

            inf_out = torch.cat(output_list, 1)
            t = time.time()
            output = non_max_suppression(inf_out,
                                         conf_thres=conf_thres,
                                         iou_thres=iou_thres)  # nms
            t1 += time.time() - t

            # Statistics per image
            for si, pred in enumerate(output):
                labels = targets[targets[:, 0] == si, 1:]
                nl = len(labels)
                tcls = labels[:, 0].tolist() if nl else []  # target class
                seen += 1

                if pred is None:
                    if nl:
                        stats.append((torch.zeros(0, niou, dtype=torch.bool),
                                      torch.Tensor(), torch.Tensor(), tcls))
                    continue

                # Assign all predictions as incorrect
                correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
                if nl:
                    detected = []  # target indices
                    tcls_tensor = labels[:, 0]

                    # target boxes
                    tbox = xywh2xyxy(labels[:, 1:5]) * whwh
                    tbox = tbox.type(torch.float32)

                    # Per target class
                    for cls in torch.unique(tcls_tensor):
                        ti = (cls == tcls_tensor).nonzero().view(
                            -1)  # prediction indices
                        pi = (cls == pred[:, 5]).nonzero().view(
                            -1)  # target indices

                        # Search for detections
                        if pi.shape[0]:
                            # Prediction to target ious
                            ious, i = box_iou(pred[pi, :4], tbox[ti]).max(
                                1)  # best ious, indices

                            # Append detections
                            for j in (ious > iouv[0]).nonzero():
                                d = ti[i[j]]  # detected target
                                if d not in detected:
                                    detected.append(d)
                                    correct[pi[j]] = ious[
                                        j] > iouv  # iou_thres is 1xn
                                    if len(
                                            detected
                                    ) == nl:  # all targets already located in image
                                        break

                # Append statistics (correct, conf, pcls, tcls)
                stats.append(
                    (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

            # Plot images
            if batch_i < 1:
                f = 'test_batch%g_gt.jpg' % batch_i  # filename
                plot_images(imgs, targets, paths=paths, names=names,
                            fname=f)  # ground truth
                f = 'test_batch%g_pred.jpg' % batch_i
                plot_images(imgs,
                            output_to_target(output, width, height),
                            paths=paths,
                            names=names,
                            fname=f)  # predictions

        # Compute statistics
        stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
        if len(stats):
            p, r, ap, f1, ap_class = ap_per_class(*stats)
            if niou > 1:
                p, r, ap, f1 = p[:, 0], r[:, 0], ap.mean(
                    1), ap[:, 0]  # [P, R, [email protected]:0.95, [email protected]]
            mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
            nt = np.bincount(stats[3].astype(np.int64),
                             minlength=nc)  # number of targets per class
        else:
            nt = torch.zeros(1)

        # Print results
        pf = '%20s' + '%10.3g' * 6  # print format
        print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))

        # Print results per class
        if verbose and nc > 1 and len(stats):
            for i, c in enumerate(ap_class):
                print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))

        # Print speeds
        if verbose:
            t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (
                img_size, img_size, batch_size)  # tuple
            print(
                'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
                % t)
예제 #7
0
def attack_video(params,
                 video_path=None,
                 attack_det_id_dict=None,
                 patch_bbox=None,
                 moving_direction=None,
                 verbose=0,
                 is_return=False):

    detector = KerasYOLOv3Model_plus(sess=K.get_session())

    n_attacks = None

    videogen = skvideo.io.FFmpegReader(video_path)
    virtual_attack = False
    detected_objects_list_prev = None
    match_info_prev = None

    cal_dx_dy_flag = True
    attack_frame_list = [*attack_det_id_dict]
    attack_frame_list.sort()

    attacking_flag = False
    attack_count_idx = 0

    is_init = True
    params_min_hits = params['min_hits']
    for frame_count, image in enumerate(videogen.nextFrame()):
        if frame_count > 1:
            is_init = False

        image_yolo, _ = letterbox_image(image,
                                        shape=(416, 416),
                                        data_format='channels_last')
        image = bgr2rgb((image_yolo * 255).astype(np.uint8))
        image_yolo_pil = Image.fromarray((image_yolo * 255).astype(np.uint8))
        detected_objects_list = detector.detect_image(image_yolo_pil)
        detected_objects_list = nms_fine_tune(detected_objects_list)

        detected_objects_list = sort_bbox_by_area(detected_objects_list)
        if len(detected_objects_list) != 0:
            nat_detected_objects_list = copy.deepcopy(detected_objects_list)

        if frame_count in attack_frame_list or attacking_flag == True:
            target_det_id = attack_det_id_dict[
                frame_count - attack_count_idx][attack_count_idx]

            if attack_count_idx == 0:
                attacking_flag = True
                target_trk_id = find_match_trk(match_info_prev, target_det_id)
                target_init_bbox = detected_objects_list[target_det_id]['bbox']
                target_init_trk_bbox = (
                    params_prev['tracker_list'][target_trk_id].obj)['bbox']
                print("Attack starts at frame {}".format(frame_count))
                print("Target bbox location in the original frame {}: {} ".
                      format(frame_count, target_init_bbox))
            if attack_count_idx != 0:
                _, _, match_info_nat = pipeline(image,
                                                nat_detected_objects_list,
                                                frame_count,
                                                params_prev,
                                                detect_output=True,
                                                verbose=0,
                                                virtual_attack=virtual_attack,
                                                return_match_info=True)
                attacking_flag = is_match(target_trk_id, target_det_id,
                                          match_info_nat)
                if not attacking_flag:
                    detection_missing = is_missing_detection(
                        nat_detected_objects_list, target_init_bbox,
                        target_det_id)
                    try:
                        tracking_missing = is_missing_detection(
                            tracker_bbox_list(params_prev['tracker_list']),
                            target_init_trk_bbox, target_trk_id)
                    except:
                        pdb.set_trace()
                    if detection_missing and not tracking_missing:
                        attacking_flag = True
                    else:
                        print('Attack finished with {0} attacks.'.format(
                            attack_count_idx))
                        n_attacks = attack_count_idx
                        cal_dx_dy_flag = True
                        attack_count_idx = 0
                        return n_attacks

            if attacking_flag:
                temp_attack_obj = detected_objects_list_prev[target_det_id]
                target_det_prev = temp_attack_obj
                target_trk_prev = params_prev['tracker_list'][
                    target_trk_id].obj
                translation_vecter_center = calculate_translation_center(
                    target_trk_prev['bbox'], target_det_prev['bbox'])

                attack_bbox = temp_attack_obj['bbox']
                attack_param = params_prev
                L = 5  #bbox moving pixel length

                if cal_dx_dy_flag and moving_direction is None:
                    if translation_vecter_center[0] == 0.:
                        ratio = 1000.0
                    else:
                        ratio = abs(translation_vecter_center[1] /
                                    translation_vecter_center[0])
                    dx = L * 1 / math.sqrt((1 + ratio * ratio))
                    dy = dx * ratio
                    if translation_vecter_center[0] > 0:
                        dx *= -1
                    if translation_vecter_center[1] > 0:
                        dy *= -1
                    cal_dx_dy_flag = False

                if attack_count_idx == 0:
                    for sub_attack_count in range(100):
                        if moving_direction is None:
                            fake_det_bbox = (
                                target_trk_prev['bbox'] +
                                np.array([dx, dy, dx, dy]) *
                                (sub_attack_count + 1)).astype(int)
                        else:
                            fake_det_bbox = (
                                target_trk_prev['bbox'] +
                                np.array(moving_direction) *
                                (sub_attack_count + 1)).astype(int)

                        detected_objects_list[target_det_id][
                            'bbox'] = fake_det_bbox
                        _, param_attack, match_info = pipeline(
                            image,
                            detected_objects_list,
                            frame_count,
                            params,
                            detect_output=True,
                            verbose=0,
                            virtual_attack=virtual_attack,
                            return_match_info=True)
                        if is_match(target_trk_id, target_det_id, match_info):
                            attack_bbox = fake_det_bbox
                            attack_param = param_attack
                            if box_iou(patch_bbox, fake_det_bbox) <= 0.0:
                                break
                        else:
                            break
                    detected_objects_list[target_det_id]['bbox'] = attack_bbox
                else:
                    del detected_objects_list[target_det_id]

                print("Fabricate bbox location {} at frame {}".format(
                    attack_bbox, frame_count))
                image_yolo_pil.save('./output/' + 'ori_' + str(frame_count) +
                                    '.png')
                attack_count_idx += 1

        image_track, params, match_info = pipeline(
            image,
            detected_objects_list,
            frame_count,
            params,
            detect_output=True,
            verbose=verbose,
            virtual_attack=virtual_attack,
            return_match_info=True,
            is_init=is_init)

        cv2.imwrite('./output/track/' + str(frame_count) + '.png', image_track)

        match_info_prev = copy.deepcopy(match_info)
        detected_objects_list_prev = copy.deepcopy(nat_detected_objects_list)
        params_prev = copy.deepcopy(params)

    return n_attacks