class instance_custom_training: def __init__(self): self.model_dir = os.getcwd() def modelConfig(self,network_backbone = "resnet101", num_classes = 1, class_names = ["BG"], batch_size = 1, image_max_dim = 512, image_min_dim = 512, image_resize_mode ="square", gpu_count = 1): self.config = Config(BACKBONE = network_backbone, NUM_CLASSES = 1 + num_classes, class_names = class_names, IMAGES_PER_GPU = batch_size, IMAGE_MAX_DIM = image_max_dim, IMAGE_MIN_DIM = image_min_dim, IMAGE_RESIZE_MODE = image_resize_mode, GPU_COUNT = gpu_count) if network_backbone == "resnet101": print("Using resnet101 as network backbone For Mask R-CNN model") else: print("Using resnet50 as network backbone For Mask R-CNN model") def load_pretrained_model(self, model_path): #load the weights for COCO self.model = modellib.MaskRCNN(mode="training", model_dir = self.model_dir, config=self.config) self.model.load_weights(model_path, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"]) def load_dataset(self, dataset): labelme_folder1 = os.path.abspath(os.path.join(dataset, "train")) #dir where the converted json files will be saved save_json_path1 = os.path.abspath(os.path.join(dataset, "train.json")) #conversion of individual labelme json files into a single json file labelme2coco.convert(labelme_folder1, save_json_path1) # Training dataset. self.dataset_train = Data() self.dataset_train.load_data(save_json_path1, labelme_folder1) self.dataset_train.prepare() labelme_folder2 = os.path.abspath(os.path.join(dataset, "test")) #dir where the converted json files will be saved save_json_path2 = os.path.abspath(os.path.join(dataset, "test.json")) #conversion of individual labelme json files into a single json file labelme2coco.convert(labelme_folder2, save_json_path2) # Training dataset. self.dataset_test = Data() self.dataset_test.load_data(save_json_path2, labelme_folder2) self.dataset_test.prepare() def visualize_sample(self): image_id = np.random.choice(self.dataset_train.image_ids) image = self.dataset_train.load_image(image_id) mask, class_ids = self.dataset_train.load_mask(image_id) bbox = extract_bboxes(mask) # Display image and instances out = display_box_instances(image, bbox, mask, class_ids, self.dataset_train.class_names) plt.imshow(out) plt.axis("off") plt.show() def train_model(self,num_epochs,path_trained_models, layers = "all", augmentation = False): if augmentation == False: print('Train %d' % len(self.dataset_train.image_ids), "images") print('Validate %d' % len(self.dataset_test.image_ids), "images") print("No augmentation") self.model.train(self.dataset_train, self.dataset_test,models = path_trained_models, epochs=num_epochs,layers=layers) else: augmentation = imgaug.augmenters.Sometimes(0.5, [ imgaug.augmenters.Fliplr(0.5), iaa.Flipud(0.5), imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0)) ]) print('Train %d' % len(self.dataset_train.image_ids), "images") print('Validate %d' % len(self.dataset_test.image_ids), "images") print("Applying augmentation on dataset") self.model.train(self.dataset_train, self.dataset_test,models = path_trained_models, augmentation = augmentation, epochs=num_epochs,layers=layers) def evaluate_model(self, model_path, iou_threshold = 0.5): self.model = MaskRCNN(mode = "inference", model_dir = os.getcwd(), config = self.config) if os.path.isfile(model_path): model_files = [model_path] if os.path.isdir(model_path): model_files = sorted([os.path.join(model_path, file_name) for file_name in os.listdir(model_path)]) for modelfile in model_files: if str(modelfile).endswith(".h5"): self.model.load_weights(modelfile, by_name=True) APs = [] #outputs = list() for image_id in self.dataset_test.image_ids: # load image, bounding boxes and masks for the image id image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(self.dataset_test, self.config, image_id) # convert pixel values (e.g. center) scaled_image = mold_image(image, self.config) # convert image into one sample sample = np.expand_dims(scaled_image, 0) # make prediction yhat = self.model.detect(sample, verbose=0) # extract results for first sample r = yhat[0] # calculate statistics, including AP AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks']) # store APs.append(AP) # calculate the mean AP across all images mAP = np.mean(APs) print(modelfile, "evaluation using iou_threshold", iou_threshold, "is", f"{mAP:01f}", '\n')
class custom_segmentation: def __init__(self): self.model_dir = os.getcwd() def inferConfig(self, name=None, network_backbone="resnet101", num_classes=1, class_names=["BG"], batch_size=1, detection_threshold=0.7, image_max_dim=512, image_min_dim=512, image_resize_mode="square", gpu_count=1): self.config = Config(BACKBONE=network_backbone, NUM_CLASSES=1 + num_classes, class_names=class_names, IMAGES_PER_GPU=batch_size, IMAGE_MAX_DIM=image_max_dim, IMAGE_MIN_DIM=image_min_dim, DETECTION_MIN_CONFIDENCE=detection_threshold, IMAGE_RESIZE_MODE=image_resize_mode, GPU_COUNT=gpu_count) def load_model(self, model_path): #load the weights for COCO self.model = MaskRCNN(mode="inference", model_dir=self.model_dir, config=self.config) self.model.load_weights(model_path, by_name=True) def segmentImage(self, image_path, show_bboxes=False, output_image_name=None, verbose=None): image = cv2.imread(image_path) new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run detection if verbose is not None: print("Processing image...") results = self.model.detect([new_img]) r = results[0] if show_bboxes == False: #apply segmentation mask output = display_instances(image, r['rois'], r['masks'], r['class_ids'], self.config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output else: #apply segmentation mask with bounding boxes output = display_box_instances(image, r['rois'], r['masks'], r['class_ids'], self.config.class_names, r['scores']) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed Image saved successfully in your current working directory." ) return r, output def segmentFrame(self, frame, show_bboxes=False, output_image_name=None, verbose=None): new_img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if verbose is not None: print("Processing frame...") # Run detection results = self.model.detect([new_img]) r = results[0] if show_bboxes == False: #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output else: #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names, r['scores']) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed Image saved successfully in your current working directory." ) return r, output def process_video(self, video_path, show_bboxes=False, output_video_name=None, frames_per_second=None): capture = cv2.VideoCapture(video_path) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') if frames_per_second is not None: save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = time.time() if show_bboxes == False: while True: counter += 1 ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame], verbose=0) print("No. of frames:", counter) r = results[0] #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") capture.release() if frames_per_second is not None: save_video.release() return r, output else: while True: counter += 1 ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame], verbose=0) print("No. of frames:", counter) r = results[0] #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names, r['scores']) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break capture.release() end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") if frames_per_second is not None: save_video.release() return r, output def process_camera(self, cam, show_bboxes=False, output_video_name=None, frames_per_second=None, show_frames=None, frame_name=None, verbose=None, check_fps=False): capture = cam if output_video_name is not None: width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = datetime.now() if show_bboxes == False: while True: ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) r = results[0] #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names) counter += 1 if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() fps = counter / timetaken print(f"{fps} frames per seconds") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return r, output else: while True: ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) r = results[0] #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], self.config.class_names, r['scores']) counter += 1 if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() fps = counter / timetaken print(f"{fps} frames per seconds") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return r, output
class custom_segmentation: def __init__(self): self.model_dir = os.getcwd() def inferConfig(self, name=None, network_backbone="resnet101", num_classes=1, class_names=["BG"], batch_size=1, detection_threshold=0.7, image_max_dim=512, image_min_dim=512, image_resize_mode="square", gpu_count=1): self.config = Config(BACKBONE=network_backbone, NUM_CLASSES=1 + num_classes, class_names=class_names, IMAGES_PER_GPU=batch_size, IMAGE_MAX_DIM=image_max_dim, IMAGE_MIN_DIM=image_min_dim, DETECTION_MIN_CONFIDENCE=detection_threshold, IMAGE_RESIZE_MODE=image_resize_mode, GPU_COUNT=gpu_count) def load_model(self, model_path): #load the weights for COCO self.model = MaskRCNN(mode="inference", model_dir=self.model_dir, config=self.config) self.model.load_weights(model_path, by_name=True) def segmentImage(self, image_path, show_bboxes=False, extract_segmented_objects=False, save_extracted_objects=False, mask_points_values=False, process_frame=False, output_image_name=None, verbose=None): if process_frame == False: image = cv2.imread(image_path) else: image = image_path new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run detection if verbose is not None: print("Processing image...") results = self.model.detect([new_img]) r = results[0] if show_bboxes == False: output = display_instances(image, r['rois'], r['masks'], r['class_ids'], self.config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) """ Code to extract and crop out each of the objects segmented in an image """ if extract_segmented_objects == False: if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val return r, output else: mask = r['masks'] m = 0 for a in range(mask.shape[2]): if process_frame == False: img = cv2.imread(image_path) else: img = image_path for b in range(img.shape[2]): img[:, :, b] = img[:, :, b] * mask[:, :, a] m += 1 extracted_objects = img[np.ix_(mask[:, :, a].any(1), mask[:, :, a].any(0))] if save_extracted_objects == True: save_path = os.path.join("segmented_object" + "_" + str(m) + ".jpg") cv2.imwrite(save_path, extracted_objects) if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val extract_mask = extracted_objects object_val = [] for a in range(extract_mask.shape[2]): m = extract_mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points object_val.append(val) extracted_objects = object_val """ The mask values of each of the extracted cropped object in the image is added to the dictionary containing an array of output values: """ r.update({"extracted_objects": extracted_objects}) return r, output else: output = display_box_instances(image, r['rois'], r['masks'], r['class_ids'], self.config.class_names, r['scores']) """ Code to extract and crop out each of the objects segmented in an image """ if extract_segmented_objects == True: mask = r['masks'] m = 0 for a in range(mask.shape[2]): if process_frame == False: img = cv2.imread(image_path) else: img = image_path for b in range(img.shape[2]): img[:, :, b] = img[:, :, b] * mask[:, :, a] m += 1 extracted_objects = img[np.ix_(mask[:, :, a].any(1), mask[:, :, a].any(0))] if save_extracted_objects == True: save_path = os.path.join("segmented_object" + "_" + str(m) + ".jpg") cv2.imwrite(save_path, extracted_objects) if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val extract_mask = extracted_objects object_val = [] for a in range(extract_mask.shape[2]): m = extract_mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points object_val.append(val) extracted_objects = object_val if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) """ The mask values of each of the extracted cropped object in the image is added to the dictionary containing an array of output values: """ r.update({"extracted_objects": extracted_objects}) return r, output else: if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output def segmentFrame(self, frame, show_bboxes=False, mask_points_values=False, output_image_name=None, verbose=None): segmask, output = self.segmentImage( frame, show_bboxes=show_bboxes, process_frame=True, mask_points_values=mask_points_values) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return segmask, output def process_video(self, video_path, show_bboxes=False, mask_points_values=False, output_video_name=None, frames_per_second=None): capture = cv2.VideoCapture(video_path) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') if frames_per_second is not None: save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = time.time() while True: counter += 1 ret, frame = capture.read() if ret: segmask, output = self.segmentImage( frame, show_bboxes=show_bboxes, process_frame=True, mask_points_values=mask_points_values) print("No. of frames:", counter) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") capture.release() if frames_per_second is not None: save_video.release() return segmask, output def process_camera(self, cam, show_bboxes=False, mask_points_values=False, output_video_name=None, frames_per_second=None, show_frames=None, frame_name=None, verbose=None, check_fps=False): capture = cam if output_video_name is not None: width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = datetime.now() while True: ret, frame = capture.read() if ret: segmask, output = self.segmentImage( frame, show_bboxes=False, process_frame=True, mask_points_values=mask_points_values) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() fps = counter / timetaken print(f"{fps} frames per seconds") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return segmask, output
class instance_segmentation(): def __init__(self, infer_speed=None): if infer_speed == "average": coco_config.IMAGE_MAX_DIM = 512 coco_config.IMAGE_MIN_DIM = 512 coco_config.DETECTION_MIN_CONFIDENCE = 0.45 elif infer_speed == "fast": coco_config.IMAGE_MAX_DIM = 384 coco_config.IMAGE_MIN_DIM = 384 coco_config.DETECTION_MIN_CONFIDENCE = 0.25 elif infer_speed == "rapid": coco_config.IMAGE_MAX_DIM = 256 coco_config.IMAGE_MIN_DIM = 256 coco_config.DETECTION_MIN_CONFIDENCE = 0.20 self.model_dir = os.getcwd() def load_model(self, model_path): self.model = MaskRCNN(mode="inference", model_dir=self.model_dir, config=coco_config) self.model.load_weights(model_path, by_name=True) def segmentImage(self, image_path, show_bboxes=False, output_image_name=None, verbose=None): image = cv2.imread(image_path) new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run detection if verbose is not None: print("Processing image...") results = self.model.detect([new_img]) coco_config.class_names = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] r = results[0] if show_bboxes == False: #apply segmentation mask output = display_instances(image, r['rois'], r['masks'], r['class_ids'], coco_config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output else: #apply segmentation mask with bounding boxes output = display_box_instances(image, r['rois'], r['masks'], r['class_ids'], coco_config.class_names, r['scores']) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed Image saved successfully in your current working directory." ) return r, output def segmentFrame(self, frame, show_bboxes=False, output_image_name=None, verbose=None): new_img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if verbose is not None: print("Processing frame...") # Run detection results = self.model.detect([new_img]) coco_config.class_names = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] r = results[0] if show_bboxes == False: #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output else: #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names, r['scores']) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed Image saved successfully in your current working directory." ) return r, output def process_video(self, video_path, show_bboxes=False, output_video_name=None, frames_per_second=None): capture = cv2.VideoCapture(video_path) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') coco_config.class_names = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] if frames_per_second is not None: save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = time.time() if show_bboxes == False: while True: counter += 1 ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) print("No. of frames:", counter) r = results[0] #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") capture.release() if frames_per_second is not None: save_video.release() return r, output else: while True: counter += 1 ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) print("No. of frames:", counter) r = results[0] #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names, r['scores']) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break capture.release() end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") if frames_per_second is not None: save_video.release() return r, output def process_camera(self, cam, show_bboxes=False, output_video_name=None, frames_per_second=None, show_frames=None, frame_name=None, verbose=None, check_fps=False): capture = cam if output_video_name is not None: width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) save_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'DIVX'), frames_per_second, (width, height)) counter = 0 start = datetime.now() coco_config.class_names = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] if show_bboxes == False: while True: ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) r = results[0] #apply segmentation mask output = display_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names) counter += 1 if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() out = counter / timetaken print(f"{out:.3f} frames per second") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return r, output else: while True: ret, frame = capture.read() if ret: # Run detection results = self.model.detect([frame]) r = results[0] #apply segmentation mask with bounding boxes output = display_box_instances(frame, r['rois'], r['masks'], r['class_ids'], coco_config.class_names, r['scores']) counter += 1 if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() fps = counter / timetaken print(f"{fps:.3f} frames per second") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return r, output
class instance_segmentation(): def __init__(self, infer_speed=None): if infer_speed == "average": coco_config.IMAGE_MAX_DIM = 512 coco_config.IMAGE_MIN_DIM = 512 coco_config.DETECTION_MIN_CONFIDENCE = 0.45 elif infer_speed == "fast": coco_config.IMAGE_MAX_DIM = 384 coco_config.IMAGE_MIN_DIM = 384 coco_config.DETECTION_MIN_CONFIDENCE = 0.25 elif infer_speed == "rapid": coco_config.IMAGE_MAX_DIM = 256 coco_config.IMAGE_MIN_DIM = 256 coco_config.DETECTION_MIN_CONFIDENCE = 0.20 self.model_dir = os.getcwd() def load_model(self, model_path): self.model = MaskRCNN(mode="inference", model_dir=self.model_dir, config=coco_config) self.model.load_weights(model_path, by_name=True) def select_target_classes(self, BG=False, person=False, bicycle=False, car=False, motorcycle=False, airplane=False, bus=False, train=False, truck=False, boat=False, traffic_light=False, fire_hydrant=False, stop_sign=False, parking_meter=False, bench=False, bird=False, cat=False, dog=False, horse=False, sheep=False, cow=False, elephant=False, bear=False, zebra=False, giraffe=False, backpack=False, umbrella=False, handbag=False, tie=False, suitcase=False, frisbee=False, skis=False, snowboard=False, sports_ball=False, kite=False, baseball_bat=False, baseball_glove=False, skateboard=False, surfboard=False, tennis_racket=False, bottle=False, wine_glass=False, cup=False, fork=False, knife=False, spoon=False, bowl=False, banana=False, apple=False, sandwich=False, orange=False, broccoli=False, carrot=False, hot_dog=False, pizza=False, donut=False, cake=False, chair=False, couch=False, potted_plant=False, bed=False, dining_table=False, toilet=False, tv=False, laptop=False, mouse=False, remote=False, keyboard=False, cell_phone=False, microwave=False, oven=False, toaster=False, sink=False, refrigerator=False, book=False, clock=False, vase=False, scissors=False, teddy_bear=False, hair_dryer=False, toothbrush=False): detected_classes = {} target_class_names = [ BG, person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic_light, fire_hydrant, stop_sign, parking_meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, sports_ball, kite, baseball_bat, baseball_glove, skateboard, surfboard, tennis_racket, bottle, wine_glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot_dog, pizza, donut, cake, chair, couch, potted_plant, bed, dining_table, toilet, tv, laptop, mouse, remote, keyboard, cell_phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy_bear, hair_dryer, toothbrush ] class_names = [ "BG", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair dryer", "toothbrush" ] for target_class_name, class_name in zip(target_class_names, class_names): if (target_class_name == True): detected_classes[class_name] = "valid" else: detected_classes[class_name] = "invalid" return detected_classes def segmentImage(self, image_path, show_bboxes=False, process_frame=False, segment_target_classes=None, extract_segmented_objects=False, save_extracted_objects=False, mask_points_values=False, output_image_name=None, verbose=None): if process_frame == False: image = cv2.imread(image_path) else: image = image_path new_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run detection if verbose is not None: print("Processing image...") results = self.model.detect([new_img]) coco_config.class_names = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] r = results[0] """ Code to filter out unused detections and detect specific classes """ if segment_target_classes is not None: bboxes = r['rois'] scores = r['scores'] masks = r['masks'] class_ids = r['class_ids'] com_bboxes = [] com_masks = [] com_scores = [] com_class_ids = [] final_dict = [] for a, b in enumerate(r['class_ids']): name = coco_config.class_names[b] box = bboxes[a] ma = masks[:, :, a] score = scores[a] c_ids = class_ids[a] if (segment_target_classes[name] == "invalid"): continue com_bboxes.append(box) com_class_ids.append(c_ids) com_masks.append(ma) com_scores.append(score) final_bboxes = np.array(com_bboxes) final_class_ids = np.array(com_class_ids) final_masks = np.array(com_masks) if len(final_masks != 0): final_masks = np.stack(final_masks, axis=2) final_scores = np.array(com_scores) final_dict.append({ "rois": final_bboxes, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, }) r = final_dict[0] if show_bboxes == False: output = display_instances(image, r['rois'], r['masks'], r['class_ids'], coco_config.class_names) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) """ Code to extract and crop out each of the objects segmented in an image """ if extract_segmented_objects == False: if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val return r, output else: mask = r['masks'] m = 0 for a in range(mask.shape[2]): img = cv2.imread(image_path) for b in range(img.shape[2]): img[:, :, b] = img[:, :, b] * mask[:, :, a] m += 1 extracted_objects = img[np.ix_(mask[:, :, a].any(1), mask[:, :, a].any(0))] if save_extracted_objects == True: save_path = os.path.join("segmented_object" + "_" + str(m) + ".jpg") cv2.imwrite(save_path, extracted_objects) if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val extract_mask = extracted_objects object_val = [] for a in range(extract_mask.shape[2]): m = extract_mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points object_val.append(val) extracted_objects = object_val """ The mask values of each of the extracted cropped object in the image is added to the dictionary containing an array of output values: """ r.update({"extracted_objects": extracted_objects}) return r, output else: output = display_box_instances(image, r['rois'], r['masks'], r['class_ids'], coco_config.class_names, r['scores']) """ Code to extract and crop out each of the objects segmented in an image """ if extract_segmented_objects == True: mask = r['masks'] m = 0 for a in range(mask.shape[2]): img = cv2.imread(image_path) for b in range(img.shape[2]): img[:, :, b] = img[:, :, b] * mask[:, :, a] m += 1 extracted_objects = img[np.ix_(mask[:, :, a].any(1), mask[:, :, a].any(0))] if save_extracted_objects == True: save_path = os.path.join("segmented_object" + "_" + str(m) + ".jpg") cv2.imwrite(save_path, extracted_objects) if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val extract_mask = extracted_objects object_val = [] for a in range(extract_mask.shape[2]): m = extract_mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points object_val.append(val) extracted_objects = object_val if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) """ The mask values of each of the extracted cropped object in the image is added to the dictionary containing an array of output values: """ r.update({"extracted_objects": extracted_objects}) return r, output else: if mask_points_values == True: mask = r['masks'] contain_val = [] for a in range(mask.shape[2]): m = mask[:, :, a] mask_values = Mask(m).polygons() val = mask_values.points contain_val.append(val) r['masks'] = contain_val if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return r, output def segmentFrame(self, frame, show_bboxes=False, segment_target_classes=None, mask_points_values=False, output_image_name=None): segmask, output = self.segmentImage( frame, show_bboxes=show_bboxes, process_frame=True, segment_target_classes=segment_target_classes, mask_points_values=mask_points_values, output_image_name=output_image_name) if output_image_name is not None: cv2.imwrite(output_image_name, output) print( "Processed image saved successfully in your current working directory." ) return segmask, output def process_video(self, video_path, show_bboxes=False, segment_target_classes=None, mask_points_values=False, output_video_name=None, frames_per_second=None): capture = cv2.VideoCapture(video_path) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'DIVX') if frames_per_second is not None: save_video = cv2.VideoWriter(output_video_name, codec, frames_per_second, (width, height)) counter = 0 start = time.time() while True: counter += 1 ret, frame = capture.read() if ret: #apply segmentation mask segmask, output = self.segmentImage( frame, show_bboxes=show_bboxes, segment_target_classes=segment_target_classes, process_frame=True, mask_points_values=mask_points_values) print("No. of frames:", counter) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if output_video_name is not None: save_video.write(output) else: break end = time.time() print(f"Processed {counter} frames in {end-start:.1f} seconds") capture.release() if frames_per_second is not None: save_video.release() return segmask, output def process_camera(self, cam, show_bboxes=False, segment_target_classes=None, mask_points_values=False, output_video_name=None, frames_per_second=None, show_frames=None, frame_name=None, verbose=None, check_fps=False): capture = cam if output_video_name is not None: width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) save_video = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'DIVX'), frames_per_second, (width, height)) counter = 0 start = datetime.now() while True: ret, frame = capture.read() if ret: segmask, output = self.segmentImage( frame, show_bboxes=show_bboxes, segment_target_classes=segment_target_classes, process_frame=True, mask_points_values=mask_points_values) counter += 1 output = cv2.resize(output, (width, height), interpolation=cv2.INTER_AREA) if show_frames == True: if frame_name is not None: cv2.imshow(frame_name, output) if cv2.waitKey(25) & 0xFF == ord('q'): break if output_video_name is not None: save_video.write(output) elif counter == 30: break end = datetime.now() if check_fps == True: timetaken = (end - start).total_seconds() out = counter / timetaken print(f"{out:.3f} frames per second") if verbose is not None: print(f"Processed {counter} frames in {timetaken:.1f} seconds") capture.release() if output_video_name is not None: save_video.release() return segmask, output