Пример #1
0
def load_datasets(gt_dataset_file, pred_dataset_file):
    """Load gt and results datasets"""
    assert osp.exists(
        gt_dataset_file), "ImageDataset filepath {} does not exist..".format(
            gt_dataset_file)
    assert osp.exists(
        pred_dataset_file), "ImageDataset filepath {} does not exist.".format(
            pred_dataset_file)

    print 'Loading groundtruth dataset from {}'.format(gt_dataset_file)
    gt_dataset = ImageDataset.from_json(gt_dataset_file)
    print 'Loaded {} dataset with {} annotations'.format(
        gt_dataset.name(), gt_dataset.num_of_images())
    print 'Loading predited dataset from {}'.format(pred_dataset_file)
    pred_dataset = ImageDataset.from_json(pred_dataset_file)
    print 'Loaded {} dataset with {} annotations'.format(
        pred_dataset.name(), pred_dataset.num_of_images())
    assert gt_dataset.num_of_images() == pred_dataset.num_of_images()

    num_of_objects_gt = sum([
        len(image_info['object_infos'])
        for image_info in gt_dataset.image_infos()
    ])
    num_of_objects_pred = sum([
        len(image_info['object_infos'])
        for image_info in gt_dataset.image_infos()
    ])
    assert num_of_objects_gt == num_of_objects_pred, "{} ! {}".format(
        num_of_objects_gt, num_of_objects_pred)

    return gt_dataset, pred_dataset
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description="Visualize Results")
    parser.add_argument("pred_dataset_file",
                        help="Path to predicted (results) JSON dataset file")
    parser.add_argument("-s",
                        "--score_threshold",
                        default=0.1,
                        type=float,
                        help="Score thresold")

    args = parser.parse_args()

    assert osp.exists(args.pred_dataset_file
                      ), "ImageDataset filepath {} does not exist.".format(
                          args.pred_dataset_file)

    print 'Loading predited dataset from {}'.format(args.pred_dataset_file)
    pred_dataset = ImageDataset.from_json(args.pred_dataset_file)
    print 'Loaded {} dataset with {} annotations'.format(
        pred_dataset.name(), pred_dataset.num_of_images())
    print "score_threshold = {}".format(args.score_threshold)

    cv2.namedWindow('image', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
    cv2.resizeWindow('image', 2048, 1024)

    wait_nav = WaitKeyNavigator(pred_dataset.num_of_images())
    wait_nav.print_key_map()

    quit_viz = False
    while not quit_viz:
        i = wait_nav.index
        image_info = pred_dataset.image_infos()[i]
        img_path = osp.join(pred_dataset.rootdir(), image_info['image_file'])
        image = cv2.imread(img_path)

        for obj_info in image_info['object_infos']:
            if 'bbx_visible' in obj_info:
                if 'score' in obj_info:
                    if obj_info['score'] < args.score_threshold:
                        continue
                draw_bbx(image, obj_info['bbx_visible'])
                if 'category' in obj_info:
                    obj_text = obj_info['category']
                    tl = tuple(
                        np.floor(obj_info['bbx_visible'][:2]).astype(int))
                    font_face = cv2.FONT_HERSHEY_PLAIN
                    font_scale = 0.8
                    thickness = 1
                    ts, baseline = cv2.getTextSize(obj_text, font_face,
                                                   font_scale, thickness)
                    cv2.rectangle(image, (tl[0], tl[1] + baseline),
                                  (tl[0] + ts[0], tl[1] - ts[1]), (0, 0, 0),
                                  cv2.FILLED)
                    cv2.addText(image,
                                obj_text,
                                tl,
                                'times',
                                color=(0, 255, 0))

        cv2.displayOverlay(
            'image',
            'Image: {}'.format(osp.splitext(osp.basename(img_path))[0]))
        cv2.imshow('image', image)

        quit_viz = wait_nav.process_key()
def main():
    """Main function"""
    description = ('Test Fast-RCNN style datalayer')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("dataset", help="ImageDataset JSON file")
    parser.add_argument("-n",
                        "--net_file",
                        required=True,
                        help="Net (prototxt) file")
    parser.add_argument("-g", "--gpu", type=int, default=0, help="Gpu Id.")
    parser.add_argument("-e",
                        "--epochs",
                        type=int,
                        default=2,
                        help="Number of epochs")
    parser.add_argument(
        "-p",
        "--pause",
        default=0,
        type=int,
        help="Set number of milliseconds to pause. Use 0 to pause indefinitely"
    )
    args = parser.parse_args()

    # init caffe
    caffe.set_device(args.gpu)
    caffe.set_mode_gpu()

    assert osp.exists(args.net_file), 'Net file "{}" do not exist'.format(
        args.net_file)
    net = caffe.Net(args.net_file, caffe.TEST)

    print 'Loading dataset from {}'.format(args.dataset)
    dataset = ImageDataset.from_json(args.dataset)
    print 'Loaded {} dataset with {} annotations'.format(
        dataset.name(), dataset.num_of_images())

    net.layers[0].add_dataset(dataset)
    net.layers[0].print_params()
    net.layers[0].generate_datum_ids()

    required_object_info_fields = net.layers[0].required_object_info_fields
    print(
        "required_object_info_fields = {}".format(required_object_info_fields))

    # Make sure we remove bad objects like tha data layer does
    filter_dataset(dataset, required_object_info_fields)

    number_of_images = dataset.num_of_images()
    assert net.layers[0].number_of_datapoints() == number_of_images
    num_of_layer_objects = sum([
        len(img_info['object_infos'])
        for img_info in net.layers[0].data_samples
    ])
    num_of_dataset_objects = sum(
        [len(img_info['object_infos']) for img_info in dataset.image_infos()])
    assert num_of_layer_objects == num_of_dataset_objects, "{} != {}".format(
        num_of_layer_objects, num_of_dataset_objects)

    cv2.namedWindow('blob_image', cv2.WINDOW_AUTOSIZE)
    cv2.namedWindow('original_image', cv2.WINDOW_AUTOSIZE)

    image_blob_shape = net.blobs['input_image'].data.shape
    assert len(image_blob_shape) == 4, 'Expects 4D data blob'
    assert image_blob_shape[
        1] == 3, 'Expects 2nd channel to be 3 for BGR image'
    batch_size = image_blob_shape[0]
    num_of_batches = int(np.ceil(dataset.num_of_images() / float(batch_size)))

    exit_loop = False
    for epoch_id in xrange(args.epochs):
        print "-----------------------Epoch # {} / {} -----------------------------".format(
            epoch_id, args.epochs)
        for b in trange(num_of_batches):
            start_idx = batch_size * b
            end_idx = min(batch_size * (b + 1), number_of_images)
            # print 'Working on batch: {}/{} (Images# {} - {}) of epoch {}'.format(b, num_of_batches, start_idx, end_idx, epoch_id)

            # Run forward pass
            _ = net.forward()

            # Get image_scales and image_flippings
            image_scales = net.blobs['image_scales'].data
            image_flippings = net.blobs['image_flippings'].data.astype(np.bool)
            assert image_scales.shape == image_flippings.shape == (
                batch_size, )

            # Get roi_blob and from that determine number_of_rois
            roi_blob = net.blobs['roi'].data
            assert roi_blob.ndim == 2 and roi_blob.shape[1] == 5

            number_of_rois = roi_blob.shape[0]
            for roi_id in xrange(number_of_rois):
                roi_batch_index = roi_blob[roi_id, 0]
                assert 0 <= roi_batch_index <= batch_size
                assert_bbx(roi_blob[roi_id, -4:])

            # Check the bbx blobs
            for bbx_blob_name in ['gt_bbx_amodal', 'gt_bbx_crop']:
                if bbx_blob_name in net.blobs:
                    bbx_blob = net.blobs[bbx_blob_name].data
                    assert bbx_blob.shape == (number_of_rois, 4)
                    for roi_id in xrange(number_of_rois):
                        assert_bbx(bbx_blob[roi_id, :])

            # Check the center proj blobs
            center_proj_blob = net.blobs['gt_center_proj'].data
            assert center_proj_blob.shape == (number_of_rois, 2)

            # Check vp blobs
            vp_blob = net.blobs['gt_viewpoint'].data
            assert vp_blob.shape == (number_of_rois,
                                     3), "Weird vp shape = {}".format(vp_blob)
            assert (vp_blob >= -np.pi).all() and (
                vp_blob < np.pi).all(), "Bad vp = \n{}".format(vp_blob)

            for i in xrange(start_idx, end_idx):
                original_image = cv2.imread(
                    osp.join(dataset.rootdir(),
                             dataset.image_infos()[i]['image_file']))
                cv2.imshow('original_image', original_image)

                image_blob = net.blobs['input_image'].data[i - start_idx]
                image_blob_bgr8 = net.layers[0].make_bgr8_from_blob(
                    image_blob).copy()

                for roi_id in xrange(roi_blob.shape[0]):
                    roi_batch_index = roi_blob[roi_id, 0]
                    if roi_batch_index == (i - start_idx):
                        bbx_roi = roi_blob[roi_id, -4:].astype(np.float32)
                        cv2.rectangle(image_blob_bgr8, tuple(bbx_roi[:2]),
                                      tuple(bbx_roi[2:]), (0, 255, 0), 1)

                cv2.imshow('blob_image', image_blob_bgr8)
                cv2.displayOverlay(
                    'blob_image',
                    'Flipped' if image_flippings[i -
                                                 start_idx] else 'Original')

                key = cv2.waitKey(args.pause)
                if key == 27:
                    cv2.destroyAllWindows()
                    exit_loop = True
                    break
                elif key == ord('p'):
                    args.pause = not args.pause

            if exit_loop is True:
                print 'User presessed ESC. Exiting epoch {}'.format(epoch_id)
                exit_loop = False
                break
        print "-----------------------End of epoch -----------------------------"

        # No check the data_layer.data_samples
        print "Verifying data_samples ...",
        for im_info_layer, im_info_dataset in zip(net.layers[0].data_samples,
                                                  dataset.image_infos()):
            for im_field in ['image_size', 'image_intrinsic']:
                if im_field in im_info_dataset:
                    assert np.all(
                        im_info_layer[im_field] == im_info_dataset[im_field])

            layer_obj_infos = im_info_layer['object_infos']
            dataset_obj_infos = im_info_dataset['object_infos']

            assert len(layer_obj_infos) == len(
                dataset_obj_infos), "{} != {}".format(len(layer_obj_infos),
                                                      len(dataset_obj_infos))
            for obj_info_layer, obj_info_dataset in zip(
                    layer_obj_infos, dataset_obj_infos):
                assert obj_info_layer['id'] == obj_info_dataset['id']
                assert obj_info_layer['category'] == obj_info_dataset[
                    'category']
                for obj_field in required_object_info_fields:
                    assert np.all(obj_info_layer[obj_field] == np.array(obj_info_dataset[obj_field])), \
                        "For obj_field '{}': {} vs {}".format(obj_field, obj_info_layer[obj_field], obj_info_dataset[obj_field])
        print "Done."
Пример #4
0
def main():
    parser = argparse.ArgumentParser(description="Visualize Image dataset")
    parser.add_argument("image_dataset_file",
                        help="Path to ImageDataset JSON file")

    args = parser.parse_args()

    print 'Loading image dataset from {} ...'.format(args.image_dataset_file),
    dataset = ImageDataset.from_json(args.image_dataset_file)
    print 'Done.'
    print dataset

    cv2.namedWindow('Image', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
    cv2.resizeWindow('Image', 2048, 1024)

    wait_nav = WaitKeyNavigator(dataset.num_of_images())
    wait_nav.print_key_map()

    quit_viz = False
    while not quit_viz:
        image_id = wait_nav.index
        image_info = dataset.image_infos()[image_id]

        W, H = image_info['image_size']

        img_path = osp.join(dataset.rootdir(), image_info['image_file'])
        assert osp.exists(img_path)
        image = cv2.imread(img_path)
        assert image.shape == (H, W, 3)

        if 'image_intrinsic' in image_info:
            K = np.array(image_info['image_intrinsic'], dtype=np.float)
        else:
            # Assume focal length f = 1.
            f = 200.
            K = np.array([[f, 0., W / 2.], [0., f, H / 2.], [0., 0., 1.]])

        K_inv = np.linalg.inv(K)

        for obj_info in image_info['object_infos']:
            h, s, l = random.random(
            ), 0.5 + random.random() / 2.0, 0.4 + random.random() / 5.0
            color = [int(256 * i) for i in colorsys.hls_to_rgb(h, l, s)]
            if 'bbx_visible' in obj_info:
                draw_bbx(image,
                         obj_info['bbx_visible'],
                         color=color,
                         thickness=1)
            if 'bbx_amodal' in obj_info:
                draw_bbx(image,
                         obj_info['bbx_amodal'],
                         color=color,
                         thickness=1)
            if 'center_proj' in obj_info:
                center_proj = np.array(obj_info['center_proj'], dtype=np.float)
                cv2.circle(image, tuple(center_proj.astype(np.float32)), 3,
                           color, -1)

                if 'viewpoint' in obj_info:
                    vp = np.array(obj_info['viewpoint'], dtype=np.float)
                    R_vp = rotation_from_viewpoint(vp)
                    distance = obj_info.get('center_dist', 10.)
                    obj_pose = Pose(R=R_vp, t=np.array([0., 0., distance]))

                    center_proj_ray = K_inv.dot(np.append(center_proj, 1))
                    delta_rot = rotation_from_two_vectors(
                        np.array([0., 0., 1.]), center_proj_ray)

                    obj_pose.R = delta_rot.dot(obj_pose.R)
                    obj_pose.t = delta_rot.dot(obj_pose.t)

                    obj_center_proj = project_point(
                        K,
                        obj_pose * (np.array([0., 0., 0.]))).astype(np.float32)
                    obj_x_proj = project_point(
                        K,
                        obj_pose * np.array([1., 0., 0.])).astype(np.float32)
                    obj_y_proj = project_point(
                        K,
                        obj_pose * np.array([0., 1., 0.])).astype(np.float32)
                    obj_z_proj = project_point(
                        K,
                        obj_pose * np.array([0., 0., 1.])).astype(np.float32)

                    cv2.line(image, tuple(obj_center_proj), tuple(obj_x_proj),
                             (0, 0, 255), 2, cv2.LINE_AA)
                    cv2.line(image, tuple(obj_center_proj), tuple(obj_y_proj),
                             (0, 255, 0), 2, cv2.LINE_AA)
                    cv2.line(image, tuple(obj_center_proj), tuple(obj_z_proj),
                             (255, 0, 0), 2, cv2.LINE_AA)

        cv2.displayOverlay(
            'Image',
            'Image: {}'.format(osp.splitext(osp.basename(img_path))[0]))
        cv2.imshow('Image', image)

        quit_viz = wait_nav.process_key()