def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_class_detections, 5] Args: generator: The generator used to run images through the model. model: The model to run on the images. score_threshold: The score confidence threshold to use. max_detections: The maximum number of detections to use per image. save_path: The path to save the images with visualized detections to. Returns: A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): image = generator.load_image(i) src_image = image.copy() h, w = image.shape[:2] anchors = generator.anchors image, scale, offset_h, offset_w = generator.preprocess_image(image) # run network boxes, scores, labels = model.predict_on_batch( [np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) boxes[..., [0, 2]] = boxes[..., [0, 2]] - offset_w boxes[..., [1, 3]] = boxes[..., [1, 3]] - offset_h boxes /= scale boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w - 1) boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h - 1) boxes[:, :, 2] = np.clip(boxes[:, :, 2], 0, w - 1) boxes[:, :, 3] = np.clip(boxes[:, :, 3], 0, h - 1) # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those scores scores = scores[0][indices] # find the order with which to sort the scores scores_sort = np.argsort(-scores)[:max_detections] # select detections # (n, 4) image_boxes = boxes[0, indices[scores_sort], :] # (n, ) image_scores = scores[scores_sort] # (n, ) image_labels = labels[0, indices[scores_sort]] # (n, 6) detections = np.concatenate([ image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1) ], axis=1) if visualize: draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32), label_to_name=generator.label_to_name, score_threshold=score_threshold) # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL) cv2.imshow('{}'.format(i), src_image) cv2.waitKey(0) # copy detections to all_detections for class_id in range(generator.num_classes()): all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1] return all_detections
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False, flip_test=False, keep_resolution=False): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_class_detections, 5] Args: generator: The generator used to run images through the model. model: The model to run on the images. score_threshold: The score confidence threshold to use. max_detections: The maximum number of detections to use per image. save_path: The path to save the images with visualized detections to. Returns: A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): image = generator.load_image(i) src_image = image.copy() c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 if not keep_resolution: tgt_w = generator.input_size tgt_h = generator.input_size image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) else: tgt_w = image.shape[1] | 31 + 1 tgt_h = image.shape[0] | 31 + 1 image = generator.preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) if flip_test: flipped_image = image[:, ::-1] inputs = np.stack([image, flipped_image], axis=0) else: inputs = np.expand_dims(image, axis=0) # run network detections = model.predict_on_batch(inputs)[0] scores = detections[:, 4] # select indices which have a score above the threshold indices = np.where(scores > score_threshold)[0] # select those detections detections = detections[indices] detections_copy = detections.copy() detections = detections.astype(np.float64) trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1) for j in range(detections.shape[0]): detections[j, 0:2] = affine_transform(detections[j, 0:2], trans) detections[j, 2:4] = affine_transform(detections[j, 2:4], trans) detections[:, [0, 2]] = np.clip(detections[:, [0, 2]], 0, src_image.shape[1]) detections[:, [1, 3]] = np.clip(detections[:, [1, 3]], 0, src_image.shape[0]) if visualize: # draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32), label_to_name=generator.label_to_name, score_threshold=score_threshold) # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL) cv2.imshow('{}'.format(i), src_image) cv2.waitKey(0) # copy detections to all_detections for class_id in range(generator.num_classes()): all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1] return all_detections
def main(queues): """ Run EfficientPose in inference mode live on webcam. """ os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() #input parameter phi = 0 path_to_weights = "model_orange_cube_v2.h5" # save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved save_path = None image_extension = ".jpg" class_to_name = { 0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher" } #Occlusion class_to_name = { 0: "cube" } #Linemod use a single class with a name of the Linemod objects score_threshold = 0.99999 translation_scale_norm = 1000.0 draw_bbox_2d = True draw_name = True #you probably need to replace the linemod camera matrix with the one of your webcam camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = { class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items() } num_classes = len(class_to_name) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights) webcam = cv2.VideoCapture("http://130.149.238.251:8080/stream/video.mjpeg") webcam.set(cv2.CAP_PROP_BUFFERSIZE, 1) # set buffer size init_distance = 0 delta = 100 #inferencing print("\nStarting inference...\n") k = 100 while k > 0: got_image, image = webcam.read() k -= 1 calc_fps = False if (len(queues) == 0): calc_fps = True while True: if (calc_fps): start_time = time.time() * 1000. #load image #for i in range(1): got_image, image = webcam.read() if not got_image: continue #scale_percent = 640./1920. # percent of original size #width = int(image.shape[1] * scale_percent) #height = int(image.shape[0] * scale_percent) #dim = (width, height) # resize image #image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA) original_image = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch( input_list) #postprocessing boxes, scores, labels, rotations, translations = postprocess( boxes, scores, labels, rotations, translations, scale, score_threshold) if (boxes.shape[0] > 0 and init_distance == 0): init_distance = np.linalg.norm(translations) draw_detections(original_image, boxes, scores, labels, rotations, translations, class_to_bbox_3D=class_to_3d_bboxes, camera_matrix=camera_matrix, label_to_name=class_to_name, draw_bbox_2d=draw_bbox_2d, draw_axis=True, draw_name=draw_name) # font font = cv2.FONT_HERSHEY_SIMPLEX # org org = (50, 50) # org org1 = (50, 100) # fontScale fontScale = 1 # Blue color in BGR color = (255, 255, 0) # Line thickness of 2 px thickness = 1 # Using cv2.putText() method # See Obj: if (boxes.shape[0] > 0 and calc_fps): original_image = cv2.putText( original_image, 'Dist:' + str(np.round(np.linalg.norm(translations))), org, font, fontScale, color, thickness, cv2.LINE_AA) #if(init_distance-np.linalg.norm(translations)>delta): # original_image = cv2.putText(original_image, 'Hand', org1, font, # fontScale, color, thickness, cv2.LINE_AA) #else: # original_image = cv2.putText(original_image, 'Dropped', org1, font, # fontScale, color, thickness, cv2.LINE_AA) #display image with predictions if (len(queues) == 2): # Input fresh data ts = int(time.time() * 1000.) queues[0].put(original_image) data = (boxes, scores, labels, rotations, translations, ts) queues[1].put(data) #print(queues[0].qsize()) cv2.imshow('image with predictions', original_image) if cv2.waitKey(1) & 0xFF == ord('q'): return "END" if not save_path is None: #images to the given path os.makedirs(save_path, exist_ok=True) cv2.imwrite( os.path.join(save_path, "frame_{}".format(i) + image_extension), original_image) if (calc_fps): end_time = time.time() * 1000. print("FPS", 1000 / (end_time - start_time)) #release webcam and close windows webcam.release() cv2.destroyAllWindows()
def main(): """ Run EfficientPose in inference mode on all images in a given directory. """ os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() #input parameter path_to_images = "/Datasets/Linemod_preprocessed/data/02/rgb/" image_extension = ".png" phi = 0 path_to_weights = "./weights/phi_0_occlusion_best_ADD(-S).h5" save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved # save_path = None class_to_name = {0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher"} #Occlusion #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects score_threshold = 0.5 translation_scale_norm = 1000.0 draw_bbox_2d = False draw_name = False #for the linemod and occlusion trained models take this camera matrix and these 3d models. in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset. camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = {class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items()} num_classes = len(class_to_name) if not os.path.exists(path_to_images): print("Error: the given path to the images {} does not exist!".format(path_to_images)) return image_list = [filename for filename in os.listdir(path_to_images) if image_extension in filename] print("\nInfo: found {} image files".format(len(image_list))) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights) #inferencing for image_filename in tqdm(image_list): #load image image_path = os.path.join(path_to_images, image_filename) image = cv2.imread(image_path) original_image = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch(input_list) #postprocessing boxes, scores, labels, rotations, translations = postprocess(boxes, scores, labels, rotations, translations, scale, score_threshold) draw_detections(original_image, boxes, scores, labels, rotations, translations, class_to_bbox_3D = class_to_3d_bboxes, camera_matrix = camera_matrix, label_to_name = class_to_name, draw_bbox_2d = draw_bbox_2d, draw_name = draw_name) if save_path is None: #display image with predictions cv2.imshow('image with predictions', original_image) cv2.waitKey(0) else: #only save images to the given path os.makedirs(save_path, exist_ok = True) cv2.imwrite(os.path.join(save_path, image_filename.replace(image_extension, "_predicted" + image_extension)), original_image)
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes] # Arguments generator : The generator used to run images through the model. model : The model to run on the images. score_threshold : The score confidence threshold to use. max_detections : The maximum number of detections to use per image. save_path : The path to save the images with visualized detections to. # Returns A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] ## added by me image_names = [] detection_list = [] scores_list = [] labels_list = [] for i in range(generator.size() ): #progressbar.progressbar(, prefix='Running network: '): raw_image = generator.load_image(i) ## i added the names part image_name = generator.image_path(i) image_names.append(image_name) image = generator.preprocess_image(raw_image.copy()) image, scale = generator.resize_image(image) if keras.backend.image_data_format() == 'channels_first': image = image.transpose((2, 0, 1)) # run network boxes, scores, labels = model.predict_on_batch( np.expand_dims(image, axis=0))[:3] # correct boxes for image scale boxes /= scale # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those scores scores = scores[0][indices] # find the order with which to sort the scores scores_sort = np.argsort(-scores)[:max_detections] # select detections image_boxes = boxes[0, indices[scores_sort], :] ## annotations for drawing: detection_list.append(image_boxes) image_scores = scores[scores_sort] scores_list.append(image_scores) image_labels = labels[0, indices[scores_sort]] labels_list.append(image_labels) image_detections = np.concatenate([ image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1) ], axis=1) if save_path is not None: ## both annotations and detections are drawn an "raw_image" draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name) cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) # copy detections to all_detections for label in range(generator.num_classes()): if not generator.has_label(label): continue all_detections[i][label] = image_detections[ image_detections[:, -1] == label, :-1] #print("scores_list: ",scores_list) #print("labels_list: ",labels_list) return all_detections, image_names, detection_list, scores_list, labels_list
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_class_detections, 5] Args: generator: The generator used to run images through the model. model: The model to run on the images. score_threshold: The score confidence threshold to use. max_detections: The maximum number of detections to use per image. save_path: The path to save the images with visualized detections to. Returns: A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): raw_image = generator.load_image(i) image = generator.preprocess_image(raw_image.copy()) image, scale = generator.resize_image(image) # run network boxes, scores, labels = model.predict_on_batch( np.expand_dims(image, axis=0))[:3] # correct boxes for image scale boxes /= scale # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those scores scores = scores[0][indices] # find the order with which to sort the scores scores_sort = np.argsort(-scores)[:max_detections] # select detections # (n, 4) image_boxes = boxes[0, indices[scores_sort], :] # (n, ) image_scores = scores[scores_sort] # (n, ) image_labels = labels[0, indices[scores_sort]] # (n, 6) image_detections = np.concatenate([ image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1) ], axis=1) if save_path is not None: draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name, score_threshold=score_threshold) cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) # copy detections to all_detections for label in range(generator.num_classes()): if not generator.has_label(label): continue all_detections[i][label] = image_detections[ image_detections[:, -1] == label, :-1] return all_detections
def main(): """ Run EfficientPose in inference mode live on webcam. """ os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() #input parameter phi = 0 path_to_weights = "./weights/phi_0_occlusion_best_ADD(-S).h5" # save_path = "./predictions/occlusion/" #where to save the images or None if the images should be displayed and not saved save_path = None image_extension = ".jpg" class_to_name = { 0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher" } #Occlusion #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects score_threshold = 0.5 translation_scale_norm = 1000.0 draw_bbox_2d = False draw_name = False #you probably need to replace the linemod camera matrix with the one of your webcam camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = { class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items() } num_classes = len(class_to_name) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights) webcam = cv2.VideoCapture(0) #inferencing print("\nStarting inference...\n") i = 0 while True: #load image got_image, image = webcam.read() if not got_image: continue original_image = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch( input_list) #postprocessing boxes, scores, labels, rotations, translations = postprocess( boxes, scores, labels, rotations, translations, scale, score_threshold) draw_detections(original_image, boxes, scores, labels, rotations, translations, class_to_bbox_3D=class_to_3d_bboxes, camera_matrix=camera_matrix, label_to_name=class_to_name, draw_bbox_2d=draw_bbox_2d, draw_name=draw_name) #display image with predictions cv2.imshow('image with predictions', original_image) if cv2.waitKey(1) & 0xFF == ord('q'): break if not save_path is None: #images to the given path os.makedirs(save_path, exist_ok=True) cv2.imwrite( os.path.join(save_path, "frame_{}".format(i) + image_extension), original_image) i += 1 #release webcam and close windows webcam.release() cv2.destroyAllWindows()
def main(weight, phi=0, dst=None): """ Run EfficientPose in inference mode live on webcam. """ cam = cv2.VideoCapture(0) i = 0 os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() #input parameter class_to_name = { 0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher" } #Occlusion #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects score_threshold = 0.5 translation_scale_norm = 1000.0 draw_bbox_2d = False draw_name = False #you probably need to replace the linemod camera matrix with the one of your webcam camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = { class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items() } num_classes = len(class_to_name) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, weight) #inferencing print("\nStarting inference...\n") while True: #load image for j in range(8): cam.grab() ret, image = cam.read() if not ret: continue img = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch( input_list) #postprocessing boxes, scores, labels, rotations, translations = postprocess( boxes, scores, labels, rotations, translations, scale, score_threshold) draw_detections(img, boxes, scores, labels, rotations, translations, class_to_bbox_3D=class_to_3d_bboxes, camera_matrix=camera_matrix, label_to_name=class_to_name, draw_bbox_2d=draw_bbox_2d, draw_name=draw_name) if type(dst) == str: #only save images to the given path os.makedirs(dst, exist_ok=True) cv2.imwrite(os.path.join(dst, f'frame_{i}.jpg'), img) #print(boxes, scores, labels, rotations, translations, sep='\n') #display image with predictions cv2.imshow('pose', img) k = cv2.waitKey(1) if k == 27 or k == ord('q'): break i += 1 #release webcam and close windows cv2.destroyAllWindows() cam.release()
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, visualize=False): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_class_detections, 5] Args: generator: The generator used to run images through the model. model: The model to run on the images. score_threshold: The score confidence threshold to use. max_detections: The maximum number of detections to use per image. save_path: The path to save the images with visualized detections to. Returns: A list of lists containing the detections for each image in the generator. """ all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): image = generator.load_image(i) src_image = image.copy() image_shape = image.shape[:2] image_shape = np.array(image_shape) image = generator.preprocess_image(image)[0] # run network detections = model.predict_on_batch([np.expand_dims(image, axis=0), np.expand_dims(image_shape, axis=0)])[0] detections_copy = detections.copy() detections = np.zeros_like(detections_copy, dtype=np.float64) # x1 detections[:, 0] = detections_copy[:, 1] # y1 detections[:, 1] = detections_copy[:, 0] # x2 detections[:, 2] = detections_copy[:, 3] # y2 detections[:, 3] = detections_copy[:, 2] detections[:, 4:] = detections_copy[:, 4:] scores = detections[:, 4] # select indices which have a score above the threshold indices = np.where(scores > score_threshold)[0] # select those detections detections = detections[indices] if visualize: draw_annotations(src_image, generator.load_annotations(i), label_to_name=generator.label_to_name) draw_detections(src_image, detections[:5, :4], detections[:5, 4], detections[:5, 5].astype(np.int32), label_to_name=generator.label_to_name, score_threshold=score_threshold) # cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) cv2.namedWindow('{}'.format(i), cv2.WINDOW_NORMAL) cv2.imshow('{}'.format(i), src_image) cv2.waitKey(0) # copy detections to all_detections for class_id in range(generator.num_classes()): all_detections[i][class_id] = detections[detections[:, -1] == class_id, :-1] return all_detections
def main(weight, phi=0, src='.', dst=None): """ Run EfficientPose in inference mode on all images in a given directory. """ os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() assert os.path.exists(src), f"Error: image folder {src} does not exist!" image_list = glob(src + '/*.jpg') print(f"\nInfo: found {len(image_list)} image files") #input parameter class_to_name = { 0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher" } #Occlusion #class_to_name = {0: "driller"} # Linemod use a single class with a name of the Linemod objects score_threshold = 0.5 translation_scale_norm = 1000.0 draw_bbox_2d = False draw_name = False # for the linemod and occlusion trained models take this camera matrix and these 3d models. # in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset. camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = { class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items() } num_classes = len(class_to_name) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, weight) #inferencing for image_path in tqdm(image_list): #load image image = cv2.imread(image_path) img = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch( input_list) #postprocessing boxes, scores, labels, rotations, translations = postprocess( boxes, scores, labels, rotations, translations, scale, score_threshold) draw_detections(img, boxes, scores, labels, rotations, translations, class_to_bbox_3D=class_to_3d_bboxes, camera_matrix=camera_matrix, label_to_name=class_to_name, draw_bbox_2d=draw_bbox_2d, draw_name=draw_name) if type(dst) == str: #only save images to the given path os.makedirs(dst, exist_ok=True) name = os.path.basename(image_path) name = os.path.join(dst, name[:-4] + '_' + name[-4:]) cv2.imwrite(name, img) print(image_path, img.shape) print(boxes, scores, labels, rotations, translations, sep='\n') #display image with predictions cv2.imshow('pose', img) cv2.waitKey(0)
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None): """ Get the detections from the model using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = (boxes+classes = detections[num_detections, 4 + num_classes], rotations = detections[num_detections, num_rotation_parameters], translations = detections[num_detections, num_translation_parameters) # Arguments generator : The generator used to run images through the model. model : The model to run on the images. score_threshold : The score confidence threshold to use. max_detections : The maximum number of detections to use per image. save_path : The path to save the images with visualized detections to. # Returns A list of lists containing the detections for each image in the generator. """ all_detections = [[ None for i in range(generator.num_classes()) if generator.has_label(i) ] for j in range(generator.size())] for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): raw_image = generator.load_image(i) image, scale = generator.preprocess_image(raw_image.copy()) # image, scale = generator.resize_image(image) camera_matrix = generator.load_camera_matrix(i) camera_input = generator.get_camera_parameter_input( camera_matrix, scale, generator.translation_scale_norm) # if keras.backend.image_data_format() == 'channels_first': # image = image.transpose((2, 0, 1)) # run network boxes, scores, labels, rotations, translations = model.predict_on_batch( [ np.expand_dims(image, axis=0), np.expand_dims(camera_input, axis=0) ])[:5] if tf.version.VERSION >= '2.0.0': boxes = boxes.numpy() scores = scores.numpy() labels = labels.numpy() rotations = rotations.numpy() translations = translations.numpy() # correct boxes for image scale boxes /= scale #rescale rotations and translations rotations *= math.pi height, width, _ = raw_image.shape # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those scores scores = scores[0][indices] # find the order with which to sort the scores scores_sort = np.argsort(-scores)[:max_detections] # select detections image_boxes = boxes[0, indices[scores_sort], :] image_rotations = rotations[0, indices[scores_sort], :] image_translations = translations[0, indices[scores_sort], :] image_scores = scores[scores_sort] image_labels = labels[0, indices[scores_sort]] image_detections = np.concatenate([ image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1) ], axis=1) if save_path is not None: raw_image = cv2.cvtColor(raw_image, cv2.COLOR_RGB2BGR) draw_annotations(raw_image, generator.load_annotations(i), class_to_bbox_3D=generator.get_bbox_3d_dict(), camera_matrix=generator.load_camera_matrix(i), label_to_name=generator.label_to_name) draw_detections(raw_image, image_boxes, image_scores, image_labels, image_rotations, image_translations, class_to_bbox_3D=generator.get_bbox_3d_dict(), camera_matrix=generator.load_camera_matrix(i), label_to_name=generator.label_to_name) cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) # copy detections to all_detections for label in range(generator.num_classes()): if not generator.has_label(label): continue all_detections[i][label] = ( image_detections[image_detections[:, -1] == label, :-1], image_rotations[image_detections[:, -1] == label, :], image_translations[image_detections[:, -1] == label, :]) return all_detections
def main(): """ Run EfficientPose in inference mode on all images in a given directory. """ os.environ['CUDA_VISIBLE_DEVICES'] = '0' allow_gpu_growth_memory() #input parameter path_to_images = "./mydataset/data/02/rgb" image_extension = ".jpg" phi = 0 path_to_weights = "./checkpoints/03_05_2021_01_15_57/occlusion/phi_0_occlusion_best_ADD(-S).h5" save_path = "./predictions/" #where to save the images or None if the images should be displayed and not saved # save_path = None class_to_name = {0: "ape", 1: "can", 2: "cat", 3: "driller", 4: "duck", 5: "eggbox", 6: "glue", 7: "holepuncher"} #Occlusion class_to_name = {0: "cube",1:"sock"} #class_to_name = {0: "driller"} #Linemod use a single class with a name of the Linemod objects score_threshold = 0.5 translation_scale_norm = 1000.0 draw_bbox_2d = True draw_name = True #for the linemod and occlusion trained models take this camera matrix and these 3d models. in case you trained a model on a custom dataset you need to take the camera matrix and 3d cuboids from your custom dataset. camera_matrix = get_linemod_camera_matrix() name_to_3d_bboxes = get_linemod_3d_bboxes() class_to_3d_bboxes = {class_idx: name_to_3d_bboxes[name] for class_idx, name in class_to_name.items()} num_classes = len(class_to_name) if not os.path.exists(path_to_images): print("Error: the given path to the images {} does not exist!".format(path_to_images)) return image_list = [filename for filename in os.listdir(path_to_images) if image_extension in filename] print("\nInfo: found {} image files".format(len(image_list))) #build model and load weights model, image_size = build_model_and_load_weights(phi, num_classes, score_threshold, path_to_weights) #inferencing for image_filename in tqdm(image_list): #load image image_path = os.path.join(path_to_images, image_filename) image = cv2.imread(image_path) original_image = image.copy() #preprocessing input_list, scale = preprocess(image, image_size, camera_matrix, translation_scale_norm) #predict boxes, scores, labels, rotations, translations = model.predict_on_batch(input_list) print(image_filename) print("Trans:",translations[:,0,:][0]) print("Rot:",rotations[:,0,:][0]) print("Boxes:",boxes[:,0,:][0]) # Computing rotation matrix test = [ 9.72052753e-01, 2.33968005e-01, -1.92990061e-02,1.98842332e-01, -8.64236653e-01, -4.62121934e-01,-1.24800652e-01, 4.45369422e-01, -8.86606395e-01 ] rot_mat = np.array(test).reshape(3,3) rvec,_ = cv2.Rodrigues(rot_mat) print(rvec) #postprocessing boxes, scores, labels, rotations, translations = postprocess(boxes, scores, labels, rotations, translations, scale, score_threshold) print("Post_Trans:",translations) print("Post_Rot:",rotations) print("Post_Boxes:",boxes) draw_detections(original_image, boxes, scores, labels, rotations, translations, class_to_bbox_3D = class_to_3d_bboxes, camera_matrix = camera_matrix, label_to_name = class_to_name, draw_bbox_2d = draw_bbox_2d, draw_name = draw_name) if save_path is None: #display image with predictions cv2.imshow('image with predictions', original_image) cv2.waitKey(0) else: #only save images to the given path os.makedirs(save_path, exist_ok = True) cv2.imwrite(os.path.join(save_path, image_filename.replace(image_extension, "_predicted" + image_extension)), original_image)