示例#1
0
def _get_detections(generator,
                    model,
                    score_threshold=0.05,
                    max_detections=100,
                    visualize=False):
    """
    Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_class_detections, 5]

    Args:
        generator: The generator used to run images through the model.
        model: The model to run on the images.
        score_threshold: The score confidence threshold to use.
        max_detections: The maximum number of detections to use per image.
        save_path: The path to save the images with visualized detections to.

    Returns:
        A list of lists containing the detections for each image in the generator.

    """
    all_detections = [[
        None for i in range(generator.num_classes()) if generator.has_label(i)
    ] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()),
                                     prefix='Running network: '):
        image = generator.load_image(i)
        src_image = image.copy()
        h, w = image.shape[:2]

        anchors = generator.anchors
        image, scale, offset_h, offset_w = generator.preprocess_image(image)

        # run network
        boxes, scores, labels = model.predict_on_batch(
            [np.expand_dims(image, axis=0),
             np.expand_dims(anchors, axis=0)])
        boxes[..., [0, 2]] = boxes[..., [0, 2]] - offset_w
        boxes[..., [1, 3]] = boxes[..., [1, 3]] - offset_h
        boxes /= scale
        boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w - 1)
        boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h - 1)
        boxes[:, :, 2] = np.clip(boxes[:, :, 2], 0, w - 1)
        boxes[:, :, 3] = np.clip(boxes[:, :, 3], 0, h - 1)

        # select indices which have a score above the threshold
        indices = np.where(scores[0, :] > score_threshold)[0]

        # select those scores
        scores = scores[0][indices]

        # find the order with which to sort the scores
        scores_sort = np.argsort(-scores)[:max_detections]

        # select detections
        # (n, 4)
        image_boxes = boxes[0, indices[scores_sort], :]
        # (n, )
        image_scores = scores[scores_sort]
        # (n, )
        image_labels = labels[0, indices[scores_sort]]
        # (n, 6)
        detections = np.concatenate([
            image_boxes,
            np.expand_dims(image_scores, axis=1),
            np.expand_dims(image_labels, axis=1)
        ],
                                    axis=1)

        if visualize:
            draw_annotations(src_image,
                             generator.load_annotations(i),
                             label_to_name=generator.label_to_name)
            draw_detections(src_image,
                            detections[:5, :4],
                            detections[:5, 4],
                            detections[:5, 5].astype(np.int32),
                            label_to_name=generator.label_to_name,
                            score_threshold=score_threshold)

            # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
            cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL)
            cv2.imshow('{}'.format(i), src_image)
            cv2.waitKey(0)

        # copy detections to all_detections
        for class_id in range(generator.num_classes()):
            all_detections[i][class_id] = detections[detections[:, -1] ==
                                                     class_id, :-1]

    return all_detections
示例#2
0
def _get_detections(generator,
                    model,
                    score_threshold=0.05,
                    max_detections=100,
                    visualize=False,
                    flip_test=False,
                    keep_resolution=False):
    """
    Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_class_detections, 5]

    Args:
        generator: The generator used to run images through the model.
        model: The model to run on the images.
        score_threshold: The score confidence threshold to use.
        max_detections: The maximum number of detections to use per image.
        save_path: The path to save the images with visualized detections to.

    Returns:
        A list of lists containing the detections for each image in the generator.

    """
    all_detections = [[
        None for i in range(generator.num_classes()) if generator.has_label(i)
    ] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()),
                                     prefix='Running network: '):
        image = generator.load_image(i)
        src_image = image.copy()

        c = np.array([image.shape[1] / 2., image.shape[0] / 2.],
                     dtype=np.float32)
        s = max(image.shape[0], image.shape[1]) * 1.0

        if not keep_resolution:
            tgt_w = generator.input_size
            tgt_h = generator.input_size
            image = generator.preprocess_image(image,
                                               c,
                                               s,
                                               tgt_w=tgt_w,
                                               tgt_h=tgt_h)
        else:
            tgt_w = image.shape[1] | 31 + 1
            tgt_h = image.shape[0] | 31 + 1
            image = generator.preprocess_image(image,
                                               c,
                                               s,
                                               tgt_w=tgt_w,
                                               tgt_h=tgt_h)
        if flip_test:
            flipped_image = image[:, ::-1]
            inputs = np.stack([image, flipped_image], axis=0)
        else:
            inputs = np.expand_dims(image, axis=0)
        # run network
        detections = model.predict_on_batch(inputs)[0]
        scores = detections[:, 4]
        # select indices which have a score above the threshold
        indices = np.where(scores > score_threshold)[0]

        # select those detections
        detections = detections[indices]
        detections_copy = detections.copy()
        detections = detections.astype(np.float64)
        trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1)

        for j in range(detections.shape[0]):
            detections[j, 0:2] = affine_transform(detections[j, 0:2], trans)
            detections[j, 2:4] = affine_transform(detections[j, 2:4], trans)

        detections[:, [0, 2]] = np.clip(detections[:, [0, 2]], 0,
                                        src_image.shape[1])
        detections[:, [1, 3]] = np.clip(detections[:, [1, 3]], 0,
                                        src_image.shape[0])

        if visualize:
            # draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
            draw_detections(src_image,
                            detections[:5, :4],
                            detections[:5, 4],
                            detections[:5, 5].astype(np.int32),
                            label_to_name=generator.label_to_name,
                            score_threshold=score_threshold)

            # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
            cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL)
            cv2.imshow('{}'.format(i), src_image)
            cv2.waitKey(0)

        # copy detections to all_detections
        for class_id in range(generator.num_classes()):
            all_detections[i][class_id] = detections[detections[:, -1] ==
                                                     class_id, :-1]

    return all_detections
示例#3
0
def main(queues):
    """
    Run EfficientPose in inference mode live on webcam.
    
    """

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    #input parameter
    phi = 0
    path_to_weights = "model_orange_cube_v2.h5"
    # save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved
    save_path = None
    image_extension = ".jpg"
    class_to_name = {
        0: "ape",
        1: "can",
        2: "cat",
        3: "driller",
        4: "duck",
        5: "eggbox",
        6: "glue",
        7: "holepuncher"
    }  #Occlusion
    class_to_name = {
        0: "cube"
    }  #Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.99999
    translation_scale_norm = 1000.0
    draw_bbox_2d = True
    draw_name = True
    #you probably need to replace the linemod camera matrix with the one of your webcam
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {
        class_idx: name_to_3d_bboxes[name]
        for class_idx, name in class_to_name.items()
    }

    num_classes = len(class_to_name)

    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes,
                                                     score_threshold,
                                                     path_to_weights)

    webcam = cv2.VideoCapture("http://130.149.238.251:8080/stream/video.mjpeg")
    webcam.set(cv2.CAP_PROP_BUFFERSIZE, 1)  # set buffer size
    init_distance = 0
    delta = 100
    #inferencing
    print("\nStarting inference...\n")
    k = 100
    while k > 0:
        got_image, image = webcam.read()
        k -= 1
    calc_fps = False
    if (len(queues) == 0):
        calc_fps = True

    while True:
        if (calc_fps):
            start_time = time.time() * 1000.
        #load image
        #for i in range(1):
        got_image, image = webcam.read()
        if not got_image:
            continue

        #scale_percent = 640./1920. # percent of original size
        #width = int(image.shape[1] * scale_percent)
        #height = int(image.shape[0] * scale_percent)
        #dim = (width, height)

        # resize image
        #image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)

        original_image = image.copy()

        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix,
                                       translation_scale_norm)

        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(
            input_list)

        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(
            boxes, scores, labels, rotations, translations, scale,
            score_threshold)

        if (boxes.shape[0] > 0 and init_distance == 0):
            init_distance = np.linalg.norm(translations)

        draw_detections(original_image,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D=class_to_3d_bboxes,
                        camera_matrix=camera_matrix,
                        label_to_name=class_to_name,
                        draw_bbox_2d=draw_bbox_2d,
                        draw_axis=True,
                        draw_name=draw_name)

        # font
        font = cv2.FONT_HERSHEY_SIMPLEX

        # org
        org = (50, 50)
        # org
        org1 = (50, 100)
        # fontScale
        fontScale = 1

        # Blue color in BGR
        color = (255, 255, 0)

        # Line thickness of 2 px
        thickness = 1

        # Using cv2.putText() method
        # See Obj:
        if (boxes.shape[0] > 0 and calc_fps):
            original_image = cv2.putText(
                original_image,
                'Dist:' + str(np.round(np.linalg.norm(translations))), org,
                font, fontScale, color, thickness, cv2.LINE_AA)
            #if(init_distance-np.linalg.norm(translations)>delta):
            #    original_image = cv2.putText(original_image, 'Hand', org1, font,
            #        fontScale, color, thickness, cv2.LINE_AA)
            #else:
            #    original_image = cv2.putText(original_image, 'Dropped', org1, font,
            #        fontScale, color, thickness, cv2.LINE_AA)
        #display image with predictions
        if (len(queues) == 2):
            # Input fresh data
            ts = int(time.time() * 1000.)
            queues[0].put(original_image)
            data = (boxes, scores, labels, rotations, translations, ts)
            queues[1].put(data)
            #print(queues[0].qsize())

        cv2.imshow('image with predictions', original_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            return "END"
        if not save_path is None:
            #images to the given path
            os.makedirs(save_path, exist_ok=True)
            cv2.imwrite(
                os.path.join(save_path,
                             "frame_{}".format(i) + image_extension),
                original_image)

        if (calc_fps):
            end_time = time.time() * 1000.
            print("FPS", 1000 / (end_time - start_time))

    #release webcam and close windows
    webcam.release()
    cv2.destroyAllWindows()
示例#4
0
def main():
    """
    Run EfficientPose in inference mode on all images in a given directory.
    
    """
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    #input parameter
    path_to_images = "/Datasets/Linemod_preprocessed/data/02/rgb/"
    image_extension = ".png"
    phi = 0
    path_to_weights = "./weights/phi_0_occlusion_best_ADD(-S).h5"
    save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved
    # save_path = None
    class_to_name = {0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher"} #Occlusion
    #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.5
    translation_scale_norm = 1000.0
    draw_bbox_2d = False
    draw_name = False
    #for the linemod and occlusion trained models take this camera matrix and these 3d models. in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset.
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items()} 
    
    num_classes = len(class_to_name)
    
    if not os.path.exists(path_to_images):
        print("Error: the given path to the images {} does not exist!".format(path_to_images))
        return
    
    image_list = [filename for filename in os.listdir(path_to_images) if image_extension in filename]
    print("\nInfo: found {} image files".format(len(image_list)))   
    
    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights)
    
    #inferencing
    for image_filename in tqdm(image_list):
        #load image
        image_path = os.path.join(path_to_images, image_filename)
        image = cv2.imread(image_path)
        original_image = image.copy()
        
        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm)
        
        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(input_list)
        
        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(boxes, scores, labels, rotations, translations, scale, score_threshold)
        
        draw_detections(original_image,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D = class_to_3d_bboxes,
                        camera_matrix = camera_matrix,
                        label_to_name = class_to_name,
                        draw_bbox_2d = draw_bbox_2d,
                        draw_name = draw_name)
        
        if save_path is None:
            #display image with predictions
            cv2.imshow('image with predictions', original_image)
            cv2.waitKey(0)
        else:
            #only save images to the given path
            os.makedirs(save_path, exist_ok = True)
            cv2.imwrite(os.path.join(save_path, image_filename.replace(image_extension, "_predicted" + image_extension)), original_image)
示例#5
0
def _get_detections(generator,
                    model,
                    score_threshold=0.05,
                    max_detections=100,
                    save_path=None):
    """ Get the detections from the model using the generator.
    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
    # Arguments
        generator       : The generator used to run images through the model.
        model           : The model to run on the images.
        score_threshold : The score confidence threshold to use.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save the images with visualized detections to.
    # Returns
        A list of lists containing the detections for each image in the generator.
    """
    all_detections = [[
        None for i in range(generator.num_classes()) if generator.has_label(i)
    ] for j in range(generator.size())]

    ## added by me
    image_names = []
    detection_list = []
    scores_list = []
    labels_list = []

    for i in range(generator.size()
                   ):  #progressbar.progressbar(, prefix='Running network: '):
        raw_image = generator.load_image(i)
        ## i added the names part
        image_name = generator.image_path(i)
        image_names.append(image_name)
        image = generator.preprocess_image(raw_image.copy())
        image, scale = generator.resize_image(image)

        if keras.backend.image_data_format() == 'channels_first':
            image = image.transpose((2, 0, 1))

        # run network
        boxes, scores, labels = model.predict_on_batch(
            np.expand_dims(image, axis=0))[:3]

        # correct boxes for image scale
        boxes /= scale

        # select indices which have a score above the threshold
        indices = np.where(scores[0, :] > score_threshold)[0]

        # select those scores
        scores = scores[0][indices]

        # find the order with which to sort the scores
        scores_sort = np.argsort(-scores)[:max_detections]

        # select detections
        image_boxes = boxes[0, indices[scores_sort], :]
        ## annotations for drawing:
        detection_list.append(image_boxes)
        image_scores = scores[scores_sort]
        scores_list.append(image_scores)
        image_labels = labels[0, indices[scores_sort]]
        labels_list.append(image_labels)
        image_detections = np.concatenate([
            image_boxes,
            np.expand_dims(image_scores, axis=1),
            np.expand_dims(image_labels, axis=1)
        ],
                                          axis=1)

        if save_path is not None:
            ## both annotations and detections are drawn an "raw_image"
            draw_annotations(raw_image,
                             generator.load_annotations(i),
                             label_to_name=generator.label_to_name)
            draw_detections(raw_image,
                            image_boxes,
                            image_scores,
                            image_labels,
                            label_to_name=generator.label_to_name)

            cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)

        # copy detections to all_detections
        for label in range(generator.num_classes()):
            if not generator.has_label(label):
                continue

            all_detections[i][label] = image_detections[
                image_detections[:, -1] == label, :-1]

    #print("scores_list: ",scores_list)
    #print("labels_list: ",labels_list)
    return all_detections, image_names, detection_list, scores_list, labels_list
def _get_detections(generator,
                    model,
                    score_threshold=0.05,
                    max_detections=100,
                    save_path=None):
    """
    Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_class_detections, 5]

    Args:
        generator: The generator used to run images through the model.
        model: The model to run on the images.
        score_threshold: The score confidence threshold to use.
        max_detections: The maximum number of detections to use per image.
        save_path: The path to save the images with visualized detections to.

    Returns:
        A list of lists containing the detections for each image in the generator.

    """
    all_detections = [[
        None for i in range(generator.num_classes()) if generator.has_label(i)
    ] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()),
                                     prefix='Running network: '):
        raw_image = generator.load_image(i)
        image = generator.preprocess_image(raw_image.copy())
        image, scale = generator.resize_image(image)

        # run network
        boxes, scores, labels = model.predict_on_batch(
            np.expand_dims(image, axis=0))[:3]

        # correct boxes for image scale
        boxes /= scale

        # select indices which have a score above the threshold
        indices = np.where(scores[0, :] > score_threshold)[0]

        # select those scores
        scores = scores[0][indices]

        # find the order with which to sort the scores
        scores_sort = np.argsort(-scores)[:max_detections]

        # select detections
        # (n, 4)
        image_boxes = boxes[0, indices[scores_sort], :]
        # (n, )
        image_scores = scores[scores_sort]
        # (n, )
        image_labels = labels[0, indices[scores_sort]]
        # (n, 6)
        image_detections = np.concatenate([
            image_boxes,
            np.expand_dims(image_scores, axis=1),
            np.expand_dims(image_labels, axis=1)
        ],
                                          axis=1)

        if save_path is not None:
            draw_annotations(raw_image,
                             generator.load_annotations(i),
                             label_to_name=generator.label_to_name)
            draw_detections(raw_image,
                            image_boxes,
                            image_scores,
                            image_labels,
                            label_to_name=generator.label_to_name,
                            score_threshold=score_threshold)

            cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)

        # copy detections to all_detections
        for label in range(generator.num_classes()):
            if not generator.has_label(label):
                continue

            all_detections[i][label] = image_detections[
                image_detections[:, -1] == label, :-1]

    return all_detections
示例#7
0
def main():
    """
    Run EfficientPose in inference mode live on webcam.
    
    """
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    #input parameter
    phi = 0
    path_to_weights = "./weights/phi_0_occlusion_best_ADD(-S).h5"
    # save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved
    save_path = None
    image_extension = ".jpg"
    class_to_name = {
        0: "ape",
        1: "can",
        2: "cat",
        3: "driller",
        4: "duck",
        5: "eggbox",
        6: "glue",
        7: "holepuncher"
    }  #Occlusion
    #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.5
    translation_scale_norm = 1000.0
    draw_bbox_2d = False
    draw_name = False
    #you probably need to replace the linemod camera matrix with the one of your webcam
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {
        class_idx: name_to_3d_bboxes[name]
        for class_idx, name in class_to_name.items()
    }

    num_classes = len(class_to_name)

    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes,
                                                     score_threshold,
                                                     path_to_weights)

    webcam = cv2.VideoCapture(0)

    #inferencing
    print("\nStarting inference...\n")
    i = 0
    while True:
        #load image
        got_image, image = webcam.read()
        if not got_image:
            continue

        original_image = image.copy()

        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix,
                                       translation_scale_norm)

        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(
            input_list)

        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(
            boxes, scores, labels, rotations, translations, scale,
            score_threshold)

        draw_detections(original_image,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D=class_to_3d_bboxes,
                        camera_matrix=camera_matrix,
                        label_to_name=class_to_name,
                        draw_bbox_2d=draw_bbox_2d,
                        draw_name=draw_name)

        #display image with predictions
        cv2.imshow('image with predictions', original_image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        if not save_path is None:
            #images to the given path
            os.makedirs(save_path, exist_ok=True)
            cv2.imwrite(
                os.path.join(save_path,
                             "frame_{}".format(i) + image_extension),
                original_image)

        i += 1

    #release webcam and close windows
    webcam.release()
    cv2.destroyAllWindows()
示例#8
0
def main(weight, phi=0, dst=None):
    """
    Run EfficientPose in inference mode live on webcam.
    """
    cam = cv2.VideoCapture(0)
    i = 0
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    #input parameter
    class_to_name = {
        0: "ape",
        1: "can",
        2: "cat",
        3: "driller",
        4: "duck",
        5: "eggbox",
        6: "glue",
        7: "holepuncher"
    }  #Occlusion
    #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.5
    translation_scale_norm = 1000.0
    draw_bbox_2d = False
    draw_name = False
    #you probably need to replace the linemod camera matrix with the one of your webcam
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {
        class_idx: name_to_3d_bboxes[name]
        for class_idx, name in class_to_name.items()
    }
    num_classes = len(class_to_name)

    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes,
                                                     score_threshold, weight)

    #inferencing
    print("\nStarting inference...\n")
    while True:
        #load image
        for j in range(8):
            cam.grab()
        ret, image = cam.read()
        if not ret: continue
        img = image.copy()

        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix,
                                       translation_scale_norm)

        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(
            input_list)

        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(
            boxes, scores, labels, rotations, translations, scale,
            score_threshold)
        draw_detections(img,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D=class_to_3d_bboxes,
                        camera_matrix=camera_matrix,
                        label_to_name=class_to_name,
                        draw_bbox_2d=draw_bbox_2d,
                        draw_name=draw_name)

        if type(dst) == str:
            #only save images to the given path
            os.makedirs(dst, exist_ok=True)
            cv2.imwrite(os.path.join(dst, f'frame_{i}.jpg'), img)

        #print(boxes, scores, labels, rotations, translations, sep='\n')
        #display image with predictions
        cv2.imshow('pose', img)
        k = cv2.waitKey(1)
        if k == 27 or k == ord('q'): break
        i += 1

    #release webcam and close windows
    cv2.destroyAllWindows()
    cam.release()
示例#9
0
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False):
    """
    Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_class_detections, 5]

    Args:
        generator: The generator used to run images through the model.
        model: The model to run on the images.
        score_threshold: The score confidence threshold to use.
        max_detections: The maximum number of detections to use per image.
        save_path: The path to save the images with visualized detections to.

    Returns:
        A list of lists containing the detections for each image in the generator.

    """
    all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '):
        image = generator.load_image(i)
        src_image = image.copy()
        image_shape = image.shape[:2]
        image_shape = np.array(image_shape)
        image = generator.preprocess_image(image)[0]

        # run network
        detections = model.predict_on_batch([np.expand_dims(image, axis=0), np.expand_dims(image_shape, axis=0)])[0]
        detections_copy = detections.copy()
        detections = np.zeros_like(detections_copy, dtype=np.float64)
        # x1
        detections[:, 0] = detections_copy[:, 1]
        # y1
        detections[:, 1] = detections_copy[:, 0]
        # x2
        detections[:, 2] = detections_copy[:, 3]
        # y2
        detections[:, 3] = detections_copy[:, 2]
        detections[:, 4:] = detections_copy[:, 4:]

        scores = detections[:, 4]
        # select indices which have a score above the threshold
        indices = np.where(scores > score_threshold)[0]

        # select those detections
        detections = detections[indices]

        if visualize:
            draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
            draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32),
                            label_to_name=generator.label_to_name,
                            score_threshold=score_threshold)

            # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
            cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL)
            cv2.imshow('{}'.format(i), src_image)
            cv2.waitKey(0)

        # copy detections to all_detections
        for class_id in range(generator.num_classes()):
            all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1]

    return all_detections
示例#10
0
def main(weight, phi=0, src='.', dst=None):
    """
    Run EfficientPose in inference mode on all images in a given directory.
    """
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    assert os.path.exists(src), f"Error: image folder {src} does not exist!"
    image_list = glob(src + '/*.jpg')
    print(f"\nInfo: found {len(image_list)} image files")

    #input parameter
    class_to_name = {
        0: "ape",
        1: "can",
        2: "cat",
        3: "driller",
        4: "duck",
        5: "eggbox",
        6: "glue",
        7: "holepuncher"
    }  #Occlusion
    #class_to_name = {0: "driller"} # Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.5
    translation_scale_norm = 1000.0
    draw_bbox_2d = False
    draw_name = False
    # for the linemod and occlusion trained models take this camera matrix and these 3d models.
    # in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset.
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {
        class_idx: name_to_3d_bboxes[name]
        for class_idx, name in class_to_name.items()
    }

    num_classes = len(class_to_name)

    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes,
                                                     score_threshold, weight)

    #inferencing
    for image_path in tqdm(image_list):
        #load image
        image = cv2.imread(image_path)
        img = image.copy()

        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix,
                                       translation_scale_norm)

        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(
            input_list)

        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(
            boxes, scores, labels, rotations, translations, scale,
            score_threshold)
        draw_detections(img,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D=class_to_3d_bboxes,
                        camera_matrix=camera_matrix,
                        label_to_name=class_to_name,
                        draw_bbox_2d=draw_bbox_2d,
                        draw_name=draw_name)

        if type(dst) == str:
            #only save images to the given path
            os.makedirs(dst, exist_ok=True)
            name = os.path.basename(image_path)
            name = os.path.join(dst, name[:-4] + '_' + name[-4:])
            cv2.imwrite(name, img)

        print(image_path, img.shape)
        print(boxes, scores, labels, rotations, translations, sep='\n')
        #display image with predictions
        cv2.imshow('pose', img)
        cv2.waitKey(0)
示例#11
0
def _get_detections(generator,
                    model,
                    score_threshold=0.05,
                    max_detections=100,
                    save_path=None):
    """ Get the detections from the model using the generator.

    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = (boxes+classes = detections[num_detections, 4 + num_classes], rotations = detections[num_detections, num_rotation_parameters], translations = detections[num_detections, num_translation_parameters)

    # Arguments
        generator       : The generator used to run images through the model.
        model           : The model to run on the images.
        score_threshold : The score confidence threshold to use.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save the images with visualized detections to.
    # Returns
        A list of lists containing the detections for each image in the generator.
    """
    all_detections = [[
        None for i in range(generator.num_classes()) if generator.has_label(i)
    ] for j in range(generator.size())]

    for i in progressbar.progressbar(range(generator.size()),
                                     prefix='Running network: '):
        raw_image = generator.load_image(i)
        image, scale = generator.preprocess_image(raw_image.copy())
        # image, scale = generator.resize_image(image)
        camera_matrix = generator.load_camera_matrix(i)
        camera_input = generator.get_camera_parameter_input(
            camera_matrix, scale, generator.translation_scale_norm)

        # if keras.backend.image_data_format() == 'channels_first':
        #     image = image.transpose((2, 0, 1))

        # run network
        boxes, scores, labels, rotations, translations = model.predict_on_batch(
            [
                np.expand_dims(image, axis=0),
                np.expand_dims(camera_input, axis=0)
            ])[:5]

        if tf.version.VERSION >= '2.0.0':
            boxes = boxes.numpy()
            scores = scores.numpy()
            labels = labels.numpy()
            rotations = rotations.numpy()
            translations = translations.numpy()

        # correct boxes for image scale
        boxes /= scale

        #rescale rotations and translations
        rotations *= math.pi
        height, width, _ = raw_image.shape

        # select indices which have a score above the threshold
        indices = np.where(scores[0, :] > score_threshold)[0]

        # select those scores
        scores = scores[0][indices]

        # find the order with which to sort the scores
        scores_sort = np.argsort(-scores)[:max_detections]

        # select detections
        image_boxes = boxes[0, indices[scores_sort], :]
        image_rotations = rotations[0, indices[scores_sort], :]
        image_translations = translations[0, indices[scores_sort], :]
        image_scores = scores[scores_sort]
        image_labels = labels[0, indices[scores_sort]]
        image_detections = np.concatenate([
            image_boxes,
            np.expand_dims(image_scores, axis=1),
            np.expand_dims(image_labels, axis=1)
        ],
                                          axis=1)

        if save_path is not None:
            raw_image = cv2.cvtColor(raw_image, cv2.COLOR_RGB2BGR)
            draw_annotations(raw_image,
                             generator.load_annotations(i),
                             class_to_bbox_3D=generator.get_bbox_3d_dict(),
                             camera_matrix=generator.load_camera_matrix(i),
                             label_to_name=generator.label_to_name)
            draw_detections(raw_image,
                            image_boxes,
                            image_scores,
                            image_labels,
                            image_rotations,
                            image_translations,
                            class_to_bbox_3D=generator.get_bbox_3d_dict(),
                            camera_matrix=generator.load_camera_matrix(i),
                            label_to_name=generator.label_to_name)

            cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)

        # copy detections to all_detections
        for label in range(generator.num_classes()):
            if not generator.has_label(label):
                continue

            all_detections[i][label] = (
                image_detections[image_detections[:, -1] == label, :-1],
                image_rotations[image_detections[:, -1] == label, :],
                image_translations[image_detections[:, -1] == label, :])

    return all_detections
示例#12
0
def main():
    """
    Run EfficientPose in inference mode on all images in a given directory.
    
    """
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    allow_gpu_growth_memory()

    #input parameter
    path_to_images = "./mydataset/data/02/rgb"
    image_extension = ".jpg"
    phi = 0
    path_to_weights = "./checkpoints/03_05_2021_01_15_57/occlusion/phi_0_occlusion_best_ADD(-S).h5"
    save_path = "./predictions/" #where to save the images or None if the images should be displayed and not saved
    # save_path = None
    class_to_name = {0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher"} #Occlusion
    class_to_name = {0: "cube",1:"sock"}
    #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects
    score_threshold = 0.5
    translation_scale_norm = 1000.0
    draw_bbox_2d = True
    draw_name = True
    #for the linemod and occlusion trained models take this camera matrix and these 3d models. in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset.
    camera_matrix = get_linemod_camera_matrix()
    name_to_3d_bboxes = get_linemod_3d_bboxes()
    class_to_3d_bboxes = {class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items()} 
    
    num_classes = len(class_to_name)
    
    if not os.path.exists(path_to_images):
        print("Error: the given path to the images {} does not exist!".format(path_to_images))
        return
    
    image_list = [filename for filename in os.listdir(path_to_images) if image_extension in filename]
    print("\nInfo: found {} image files".format(len(image_list)))   
    
    #build model and load weights
    model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights)
    
    #inferencing
    for image_filename in tqdm(image_list):
        #load image
        image_path = os.path.join(path_to_images, image_filename)
        image = cv2.imread(image_path)
        original_image = image.copy()
        
        #preprocessing
        input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm)
        
        #predict
        boxes, scores, labels, rotations, translations = model.predict_on_batch(input_list)
        print(image_filename)
        print("Trans:",translations[:,0,:][0])
        print("Rot:",rotations[:,0,:][0])
        print("Boxes:",boxes[:,0,:][0])

        # Computing rotation matrix
        test = [ 9.72052753e-01, 2.33968005e-01, -1.92990061e-02,1.98842332e-01, -8.64236653e-01, -4.62121934e-01,-1.24800652e-01, 4.45369422e-01, -8.86606395e-01 ]
        rot_mat = np.array(test).reshape(3,3)
        rvec,_ = cv2.Rodrigues(rot_mat)
        print(rvec)
        #postprocessing
        boxes, scores, labels, rotations, translations = postprocess(boxes, scores, labels, rotations, translations, scale, score_threshold)
        
        print("Post_Trans:",translations)
        print("Post_Rot:",rotations)
        print("Post_Boxes:",boxes)

        draw_detections(original_image,
                        boxes,
                        scores,
                        labels,
                        rotations,
                        translations,
                        class_to_bbox_3D = class_to_3d_bboxes,
                        camera_matrix = camera_matrix,
                        label_to_name = class_to_name,
                        draw_bbox_2d = draw_bbox_2d,
                        draw_name = draw_name)
        
        if save_path is None:
            #display image with predictions
            cv2.imshow('image with predictions', original_image)
            cv2.waitKey(0)
        else:
            #only save images to the given path
            os.makedirs(save_path, exist_ok = True)
            cv2.imwrite(os.path.join(save_path, image_filename.replace(image_extension, "_predicted" + image_extension)), original_image)