import skimage.draw from PIL import Image if __name__ == '__main__': config = Waldoconfig(predict=True) config.display() model = MaskRCNN(mode="inference", config=config, model_dir=config.MODEL_DIR) weights_path = sys.argv[1] print("weights_path: ", weights_path) model.load_weights(weights_path, by_name=True) image = skimage.io.imread(sys.argv[2]) masks = model.detect([image], verbose=1)[0]["masks"] print("Masks:", masks) gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255 mask_filter = (np.sum(masks, -1, keepdims=True) >= 1) if mask_filter.shape[0] > 0: waldo = np.where(mask_filter, image, gray).astype(np.uint8) img = Image.fromarray(waldo, 'RGB') img.show() else: print("Can't find Waldo. Hmm..")
class MaskRCNNDetectObject: def __init__(self): self.fetch_resources() self.model_init = False self.user_config = self.get_operator_config() # define special param here self.config = MaskRCNNConfig() self.label = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] self.model_path = os.path.join(temp_directory(), COCO_MODEL_PATH) # initialize model try: self.graph = tf.Graph() with self.graph.as_default(): with tf.device(self.device_str): self.session = tf.Session(config=self.user_config) KTF.set_session(self.session) self.rcnn = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=self.config) self.graph = KTF.get_graph() self.session = KTF.get_session() with self.session.as_default(): self.bulk_execute(np.zeros((1, 300, 300, 3))) except Exception as e: logging.error("unexpected error happen during build graph", exc_info=True) raise e def get_operator_config(self): try: self.device_str = os.environ.get("device_id", "/cpu:0") config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True gpu_mem_limit = float(os.environ.get("gpu_mem_limit", 0.3)) config.gpu_options.per_process_gpu_memory_fraction = gpu_mem_limit # for device debug info print if os.environ.get("log_device_placement", False): config.log_device_placement = True logging.info("device id %s, gpu memory limit: %f", self.device_str, gpu_mem_limit) except Exception as e: logging.error("unexpected error happen during read config", exc_info=True) raise e logging.info("Model device str: %s, session config: %s", self.device_str, config) return config def fetch_resources(self): # download_temp_file(COCO_MODEL_URL, COCO_MODEL_PATH) pass def load_model(self): self.rcnn.load_weights(self.model_path, by_name=True) self.model_init = True def get_bboxes(self, boxes, scores, classes): bboxes = [[ BoundingBox(x1=box[1], y1=box[0], x2=box[3], y2=box[2], score=score, label=self.label[int(cls)]) for (box, score, cls) in zip(boxes.tolist(), scores.tolist(), classes.tolist()) ]] return bboxes @staticmethod def get_obj_image(images, bboxes): obj_images = [] for i, frame_bboxes in enumerate(bboxes): frame_object = [] for j, bbox in enumerate(frame_bboxes): tmp = images[i][int(bbox.y1):int(bbox.y2), int(bbox.x1):int(bbox.x2)] frame_object.append(cv2base64(tmp)) obj_images.append(frame_object) return obj_images def execute(self, image): with self.graph.as_default(): with tf.device(self.device_str): with self.session.as_default(): if not self.model_init: self.load_model() results = self.rcnn.detect([image]) bboxes = self.get_bboxes(results[0]["rois"], results[0]["scores"], results[0]["class_ids"]) bboxes[0].sort(key=lambda x: -x.score) objects_image = self.get_obj_image([image], bboxes) return objects_image[0] def bulk_execute(self, images): objs = [] for image in images: objs.append(self.execute(image)) return objs @property def name(self): return "mask_rcnn" @property def type(self): return "processor" @property def input(self): return "image" @property def output(self): return "images" @property def dimension(self): return "-1" @property def metric_type(self): return "-1"
from matplotlib import pyplot from matplotlib.patches import Rectangle import cv2 class TestConfig(Config): NAME = "test" GPU_COUNT = 1 IMAGES_PER_GPU = 1 NUM_CLASSES = 1 + 80 rcnn = MaskRCNN(mode='inference', model_dir='./', config=TestConfig()) rcnn.load_weights('mask_rcnn_coco.h5', by_name=True) cap = cv2.VideoCapture(0) while True: _, img = cap.read() results = rcnn.detect([img], verbose=0) obj = results[0]['rois'] for x, y, w, h in obj: cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 4) cv2.imshow('result', img) if cv2.waitKey(4) == 27: break cap.release() cv2.destroyAllWindows()
model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model model.load_weights(weights, by_name=True) input_video = 'input_video.MOV' capture = cv2.VideoCapture(input_video) fps = 25.0 width = int(capture.get(3)) height = int(capture.get(4)) fcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X') out = cv2.VideoWriter("new_video.avi", fcc, fps, (width, height)) while True: ret, frame = capture.read() results = model.detect([frame], verbose=0) r = results[0] masked_frame = segment_people(frame, r['masks'], r['class_ids']) cv2.imshow('video', masked_frame) # Recording Video out.write(masked_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break capture.release() cv2.destroyAllWindows()
def retrieve_inference_to_json(model: MaskRCNN, ds, image_id: str, json_dir: str) -> None: """Retrieve inference result from the model the store in json file.""" # Load image from dataset original_image = ds.load_image(image_id) _, window, scale, padding, _ = resize_image(original_image, mode="pad64") # No rescaling applied assert scale == 1 # Retrieve predictions height, width = original_image.shape[:2] result = model.detect([original_image], verbose=0)[0] filtered_result = filter_resuLt_by_score(result, 0.9) # Dict object to dump to json dump = {} dump["image"] = { "file_name": 'img/' + ds.image_info[image_id]['id'] + '.jpg', "id": int(image_id), "height": int(height), "width": int(width), } dump["annotations"] = [] assert filtered_result['rois'].shape[0] == \ filtered_result['masks'].shape[-1] == \ filtered_result['class_ids'].shape[0] # Encoding annotations into json for obj_id in range(filtered_result['rois'].shape[0]): roi = filtered_result['rois'][obj_id, :] mask = filtered_result['masks'][..., obj_id] class_id = filtered_result['class_ids'][obj_id] y1, x1, y2, x2 = int(roi[0]), int(roi[1]), int(roi[2]), int(roi[3]) contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = contours[0] polygon = [] # 1d flatten list of [x, y] coordinates for pt_id in range(cnt.shape[0]): polygon.append(int(cnt[pt_id, 0, 0])) polygon.append(int(cnt[pt_id, 0, 1])) obj = {'id': int(obj_id), 'segmentation': [polygon], 'area': float(cv2.contourArea(cnt)), # x, y, h, w 'bbox': [x1, y1, x2 - x1, y2 - y1], 'image_id': int(image_id), 'category_id': int(class_id), 'iscrowd': 0} dump["annotations"].append(obj) json_path = get_inference_result_path(ds.image_info[image_id]['id'], json_dir) with open(json_path, 'w') as f: json.dump(dump, f) return
#data,address = s.recvfrom(2048) #x = data.decode() #print(x) while True: image_1 = cv2.VideoCapture(0) image_2 = cv2.VideoCapture(1) ret1, image1 = image_1.read() ret2, image2 = image_2.read() image_1.release() image_2.release() img1 = image1[:, :, ::-1] img2 = image2[:, :, ::-1] results1 = model.detect([img1], verbose=0) results2 = model.detect([img2], verbose=0) r1 = results1[0] r2 = results2[0] # Show the frame of video on the screen #cv2.imshow('Video', ) #visualize.display_instances(img1, r['rois'], r['masks'], r['class_ids'], # class_names, r['scores']) if r1['class_ids'] is not None: print("============") if list(set(r1['class_ids']).intersection(set(r2['class_ids']))): print("============") print("有东西")
image = apply_mask(image, mask, color) image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) image = cv2.putText(image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 0.7, color, 2) return image # In[ ]: capture = cv2.VideoCapture('video1.mp4') # here a pre-recorded video is used # these 2 lines can be removed if you dont have a 1080p camera. capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1920) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080) while True: ret, frame = capture.read() results = rcnn.detect([frame], verbose=0) r = results[0] frame = display_instances(frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores']) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break capture.release() cv2.destroyAllWindows() # In[ ]:
image_id, use_mini_mask=False) log("original_image", original_image) log("image_meta", image_meta) log("gt_class_id", gt_class_id) log("gt_bbox", gt_bbox) log("gt_mask", gt_mask) visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, dataset_train.class_names, figsize=(8, 8)) results = model.detect([original_image], verbose=1) r = results[0] visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], dataset_val.class_names, r['scores'], ax=get_ax()) ## Evaluation # Compute VOC-Style mAP @ IoU=0.5 # Running on 10 images. Increase for better accuracy. image_ids = np.random.choice(dataset_val.image_ids, 10)
def main(): argv = sys.argv if "--" not in argv: argv = [] # as if no args are passed else: argv = argv[argv.index("--") + 1:] # get all args after "--" # When --help or no args are given, print this help usage_text = ("python real_test.py -- [options]") parser = argparse.ArgumentParser(description=usage_text) parser.add_argument( "-isynth", "--input_syndir", dest="synth_path", type=str, required=True, help="Input the synthetic image directory", ) parser.add_argument( "-iweight", "--input_weightfile", dest="weight_path", type=str, required=True, help="Input the weight file", ) parser.add_argument( "-iresults", "--input_resultsdir", dest="results_path", type=str, required=True, help="Input the results directory", ) args = parser.parse_args(argv) if not argv: parser.print_help() return if (not args.synth_path or not args.weight_path or not args.results_path): print("Error: argument not given, aborting.") parser.print_help() return rcnn = MaskRCNN(mode='inference', model_dir='./', config=TestConfig()) rcnn.load_weights(args.weight_path, by_name=True) for r, d, f in os.walk(args.synth_path): for file in f: filename = file #load image img = load_img(os.path.join(args.synth_path, filename)) img = img_to_array(img) # make prediction results = rcnn.detect([img], verbose=0) # visualize the results r = results[0] draw_image_with_boxes(os.path.join(args.synth_path, filename), r['rois'], r['class_ids'], r['scores'], filename, args.results_path) file_name = filename.split('.')[0] #generate detected data txt files for mAP calculation generate_predicted_txt(file_name, r['rois'], r['class_ids'], r['scores'])
def worker1(): # Video file or camera to process - set this to 0 to use your webcam instead of a video file VIDEO_SOURCE = "analyze/input/rldc.mp4" # Physical capacity of area TOTAL_PARKING_CAPACITY = 5 # Create a Mask-RCNN model in inference mode model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model model.load_weights(COCO_MODEL_PATH, by_name=True) # Location of parking spaces parked_car_boxes = None # Load the video file we want to run detection on video_capture = cv2.VideoCapture(VIDEO_SOURCE) cv2.waitKey(33) frame_counter = 1 has_space = False # Loop over each frame of video while video_capture.isOpened(): success, frame = video_capture.read() if not success: break parking_areas = np.array([[304, 556, 359, 650]]) overlaps = parking_areas if(frame_counter % 100 == 0): print("----------------------------------------------") print("Start detecting cars ....") # Capture frame-by-frame start_time = time.time() # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) # Filter the results to only grab the car / truck bounding boxes car_boxes = get_car_boxes(r['rois'], r['class_ids']) print("car_boxes") # print(car_boxes) print("Cars found in frame of video:") # Draw each box on the frame i = 1 for box in car_boxes: print("Car ", i, ":", box) y1, x1, y2, x2 = box # Draw the box cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1) cv2.putText(frame, "%s" % str(i), (x1, y1), cv2.LINE_AA, 1, (0, 0, 255)) cv2.circle(frame, (x1, y1), 5, (0, 0, 255), -1) i = i + 1 # See how much cars overlap with the known parking spaces print("parking_areas") print(parking_areas) overlaps = mrcnn.utils.compute_overlaps(car_boxes, parking_areas) # parking_areas print(len(overlaps.tolist())) print("Checking overlaps .... frame %d" % frame_counter) print(overlaps) # print(overlaps) print(overlaps < 0.5) result = space_Violation(overlaps) if result < 2: print("Free Parking Spaces") has_space = True cv2.putText(frame, "Parking Spaces Available : %s" % str(TOTAL_PARKING_CAPACITY - result), (10, 50), cv2.LINE_AA, 1, (100, 255, 0)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) else: has_space = False cv2.putText(frame,"Don't Have Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 0, 255)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) #cv2.imwrite("analyze/output/frame%d.jpg" % frame_counter, frame), for debug cv2.imwrite("analyze/output/frame-analyzed-1.jpg", frame) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) #add a sleep for demo # time.sleep(5) feed1 = Settings() feed1.device.state.update({'NORTH': TOTAL_PARKING_CAPACITY - result}) print("-----STATE--------") print(feed1.device.state) if has_space: # print("Free Parking Spaces") # TODO - Push to DB cv2.putText(frame, "Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0,255,0)) else: # TODO - Push to DB cv2.putText(frame, "Don't Have Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0,0,255)) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) frame_counter = frame_counter + 1 #Hit 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # Clean up everything when finished video_capture.release() cv2.destroyAllWindows()
def search_parking_space(self): # Filter a list of Mask R-CNN detection results to get only the detected cars / trucks def get_car_boxes(boxes, class_ids): car_boxes = [] for i, box in enumerate(boxes): # If the detected object isn't a car / truck, skip it if class_ids[i] in [3, 8, 6]: car_boxes.append(box) return np.array(car_boxes) # Root directory of the project ROOT_DIR = Path(".") # Directory to save logs and trained model MODEL_DIR = os.path.join(ROOT_DIR, "logs") # Local path to trained weights file COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") # Download COCO trained weights from Releases if needed if not os.path.exists(COCO_MODEL_PATH): mrcnn.utils.download_trained_weights(COCO_MODEL_PATH) # Directory of images to run detection on IMAGE_DIR = os.path.join(ROOT_DIR, "images") # Video file or camera to process - set this to 0 to use your webcam instead of a video file VIDEO_SOURCE = "test_images/parking.mp4" # Create a Mask-RCNN model in inference mode model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model model.load_weights(COCO_MODEL_PATH, by_name=True) # Location of parking spaces parked_car_boxes = None # How many frames of video we've seen in a row with a parking space open free_space_frames = 0 # Have we sent an SMS alert yet? sms_sent = False # Load the video file we want to run detection on video_capture = cv2.VideoCapture(VIDEO_SOURCE) # Free parking space slot free_space_slot = 0 # Loop over each frame of video while video_capture.isOpened(): success, frame = video_capture.read() if not success: break # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) if parked_car_boxes is None: # This is the first frame of video - assume all the cars detected are in parking spaces. # Save the location of each car as a parking space box and go to the next frame of video. parked_car_boxes = get_car_boxes(r['rois'], r['class_ids']) else: # We already know where the parking spaces are. Check if any are currently unoccupied. # Get where cars are currently located in the frame car_boxes = get_car_boxes(r['rois'], r['class_ids']) # See how much those cars overlap with the known parking spaces overlaps = mrcnn.utils.compute_overlaps( parked_car_boxes, car_boxes) # Assume no spaces are free until we find one that is free free_space = False # Loop through each known parking space box c = 0 for parking_area, overlap_areas in zip(parked_car_boxes, overlaps): c += 1 # For this parking space, find the max amount it was covered by any # car that was detected in our image (doesn't really matter which car) max_IoU_overlap = np.max(overlap_areas) # Get the top-left and bottom-right coordinates of the parking area y1, x1, y2, x2 = parking_area # Check if the parking space is occupied by seeing if any car overlaps # it by more than 0.15 using IoU if max_IoU_overlap < 0.15: # Parking space not occupied! Draw a green box around it cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText(frame, str(c), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 2), 2) # Capturing video _, frame = video_capture.read() cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.imwrite('result.jpg', frame) self.lbl_img.setPixmap(QtGui.QPixmap("result.jpg")) self.lbl_slot.setText(str(c)) # Clean up everything when finished video_capture.release() cv2.destroyAllWindows() # Flag that we have seen at least one open space free_space = True free_space_slot = c else: # Parking space is still occupied - draw a red box around it cv2.putText(frame, str(c), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 2), 2) cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 1) # Write the IoU measurement inside the box font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(frame, f"{max_IoU_overlap:0.2}", (x1 + 6, y2 - 6), font, 0.3, (255, 255, 255)) # If at least one space was free, start counting frames # This is so we don't alert based on one frame of a spot being open. # This helps prevent the script triggered on one bad detection. if free_space: free_space_frames += 1 else: # If no spots are free, reset the count free_space_frames = 0 # If a space has been free for several frames, we are pretty sure it is really free! if free_space_frames > 2: # If we haven't sent an SMS yet, sent it! if not sms_sent: print("SENDING SMS!!!") print(free_space_slot) sms_sent = True text_message = "Hello, parking spot number " + str( free_space_slot) + " avaliable" message = client.messages.create( body=text_message, from_='********************', to='********************') # Show the frame of video on the screen cv2.imshow('Video', frame) # Hit 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # Clean up everything when finished video_capture.release() cv2.destroyAllWindows()
# model weights input <<<<<<< HEAD rcnn_model.load_weights('models/mask_rcnn_coco.h5', by_name=True) ======= >>>>>>> 2ffc4581f4632ec494d19a7af0f5912e7482a631 path_weights_file = 'models/mask_rcnn_coco.h5' rcnn_model.load_weights(path_weights_file, by_name=True) # single image input path_to_image = sys.argv[1] img = load_img(path_to_image) # transition to array img = img_to_array(img) print('Image shape:', img.shape) # make inference results = rcnn_model.detect([img], verbose=0) # the output is a list of dictionaries, where each dict has a single object detection # {'rois': array([[ 30, 54, 360, 586]], dtype=int32), # 'class_ids': array([21], dtype=int32), # 'scores': array([0.9999379], dtype=float32), # 'masks': huge_boolean_array_here ... result_params = results[0] # show photo with bounding boxes, masks, class labels and scores display_instances(img, result_params['rois'], result_params['masks'], result_params['class_ids'], class_names, result_params['scores'])
from mrcnn.config import coco_config from mrcnn.model import MaskRCNN, apply_magic from mrcnn.utils import download_trained_weights ORIGINAL_IMAGE = 'demo/demo.jpg' ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") if not os.path.isfile(COCO_MODEL_PATH): download_trained_weights(COCO_MODEL_PATH) model = MaskRCNN(mode="inference", config=coco_config) model.load_weights(COCO_MODEL_PATH, by_name=True) # Use OpenCV to read image = cv2.imread(ORIGINAL_IMAGE) # Use cvtColor to accomplish image transformation from RGB image to gray image gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) results = model.detect([image]) r = results[0] apply_magic(image, gray_image, r['rois'], r['masks'], r['class_ids']) cv2.imwrite('save_image.jpg', image)
class MRCNNPreprocessing(Preprocessing): class MRCNNPreprocConfig(Config): NAME = "preproc" IMAGES_PER_GPU = 1 # 1 reduces training time but gives an error https://github.com/matterport/Mask_RCNN/issues/521 DETECTION_MIN_CONFIDENCE = 0.6 def __init__(self, classes_ids): Config.NUM_CLASSES = len(classes_ids) super().__init__() def __init__(self, mrcnn_classes_file, mrcnn_weights, max_frames, output_size) -> None: super().__init__(output_size) # Mask-RCNN model self.max_frames = max_frames classes_ids = utils.load_class_ids(mrcnn_classes_file) config = MRCNNPreprocessing.MRCNNPreprocConfig(classes_ids) # Create model object in inference mode. self.model = MaskRCNN(mode="inference", model_dir="./log", config=config) # Load weights trained on MS-COCO self.model.load_weights(mrcnn_weights, by_name=True, exclude=[]) def process_image_mrcnn(self, image): results = self.model.detect([image], verbose=0) # print("Detecting took %s ms" % (datetime.now() - time)) # time = datetime.now() r = results[0] ids = r['class_ids'] maschere = r["masks"] return ids, maschere def apply_masks_to_image(self, image, masks): for r in range(min(masks.shape[0], image.shape[0])): for c in range(min(masks.shape[1], image.shape[1])): if not np.any(masks[r, c]): image[r][c] = (0, 0, 0) def _extract_frames(self, video, length, width, height, fps): interval = max(1, length // self.max_frames) count = 0 skipping = 0 while count < self.max_frames: success, image = video.read() if not success: break # image = cv2.resize(image, (OUTPUT_SIZE, int(image.shape[1] * OUTPUT_SIZE / image.shape[0]))) # time = datetime.now() image = self._resize_to_required(image) ids, maschere = self.process_image_mrcnn(image) if skipping > 40: break if len(ids) == 0: skipping += 1 print("Skipping frame %s" % skipping) video.set(cv2.CAP_PROP_POS_FRAMES, (count * interval) + (skipping * int(fps / 2))) continue skipping = 0 print("Taking frame %s" % count) # Apply mask to original image self.apply_masks_to_image(image, maschere) yield ids, image count += 1 video.set(cv2.CAP_PROP_POS_FRAMES, (count * interval))
class PersonSegmentation: """Interface for interacting with pretrained on COCO dataset Mask R-CNN model.""" def __init__(self, mode="inference", model_dir="logs", config=MaskRCNNConfig()): self.model = MaskRCNN(mode=mode, model_dir=model_dir, config=config) self.model.load_weights(DATASET_FILE, by_name=True) self.CLASS_NAMES = [ 'BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] self.detections = None self.person_masks = None self.person_boxes = None def detect(self, image): """Perform instance segmentation on image. In OpenCV images are stored in BGR mode, so we have to convert it to RGB.""" self.detections = self.model.detect([image[:, :, ::-1]], verbose=1)[0] self.person_masks = np.array( self.detections['masks'])[:, :, self.detections['class_ids'] == 1] self.person_boxes = np.array( self.detections['rois'])[self.detections['class_ids'] == 1] self.num_person = self.person_masks.shape[2] def visualize_detections(self, image, person_only=True): """Draw bounding boxes and masks for detected objects on image. If person_only is true, draw only for "person" objects. Returns new image. """ COLORS = mrcnn.visualize.random_colors(len(self.CLASS_NAMES)) masks = self.detections['masks'] boxes = self.detections['rois'] class_ids = self.detections['class_ids'] scores = self.detections['scores'] for i in range(boxes.shape[0]): class_id = class_ids[i] if person_only and class_id != 1: continue y1, x1, y2, x2 = boxes[i] label = self.CLASS_NAMES[class_id] font = cv.FONT_HERSHEY_DUPLEX color = tuple([int(c) for c in np.array(COLORS[class_id]) * 255]) text = "{}: {:.3f}".format(label, scores[i]) size = 0.5 width = 2 mask = masks[:, :, i] image = mrcnn.visualize.apply_mask(image, mask, color, alpha=0.6) cv.rectangle(image, (x1, y1), (x2, y2), color, width) cv.putText(image, text, (x1, y1 - 20), font, size, color, width) return image
def visualize_detections(image, masks, boxes, class_ids, scores): bgr_image = image.copy() font = cv2.FONT_HERSHEY_DUPLEX size = 1 width = 1 color = (255, 255, 255) for i in range(boxes.shape[0]): y1, x1, y2, x2 = boxes[i] if int(class_ids[i])!=1: continue text = "Human: {:.3f}".format(scores[i]) cv2.rectangle(bgr_image, (x1, y1), (x2, y2), color, width) cv2.putText(bgr_image, text, (x1, y1-20), font, size, color, width) return bgr_image IMAGE_DIR = os.path.join(os.getcwd(), "images") files = os.listdir(IMAGE_DIR) for filename in files: if 'done_' in filename: files.remove(filename[5:]) if files.count(filename[5:]) else None continue image = cv2.imread(os.path.join(IMAGE_DIR, filename)) rgb_image = image[:, :, ::-1] detections = model.detect([rgb_image])[0] output_image = visualize_detections(image, detections['masks'], detections['rois'], detections['class_ids'], detections['scores']) cv2.imwrite(os.path.join(IMAGE_DIR, 'done_'+filename),output_image)
############################################################################### # Detection ############################################################################### steel_config = InferenceConfig() model = MaskRCNN(mode="inference", config=steel_config, model_dir=str(MODEL_DIR)) # Run the detection pipeline # images: List of images, potentially of different sizes. # Returns a list of dicts, one dict per image. The dict contains: # rois : [N, (y1, x1, y2, x2)] detection bounding boxes # class_ids : [N] int class IDs # scores : [N] float probability scores for the class IDs # masks : [H, W, N] instance binary masks results = model.detect(images=[im], verbose=1) r = results[0] ############################################################################### # Visualization ############################################################################### visualize.display_instances(image=im, boxes=r['rois'], masks=r['masks'], class_ids=r['class_ids'], scores=r['scores'], class_names=CLASS_NAMES)
from mrcnn import visualize import skimage.io from src.config import * from src.dataset import * class_names = ['BG', 'cow'] cfg = CattlePredictionConfig() # define the model model = MaskRCNN(mode='inference', model_dir='models/', config=cfg) print("[INFO] Loading Weights...") model.load_weights('models/mask_rcnn_cattle_config_0004.h5', by_name=True) image_name = "0.jpg" image = skimage.io.imread(image_name) results = model.detect([image], verbose=1) cred = credentials.Certificate( "sih2020-e29b2-firebase-adminsdk-abixl-1367d4ad1b.json") firebase_admin.initialize_app( cred, {'databaseURL': 'https://sih2020-e29b2.firebaseio.com'}) learn = load_learner('data') warnings.filterwarnings("ignore") app = Flask(__name__) def getCowCount(filename): global model image = skimage.io.imread(filename) start_time = time.time()
class Analisador: def __init__(self): """ Inicializa o objeto responsavel pela verificação das vagas """ # Root directory of the project ROOT_DIR = Path(".") # Directory to save logs and trained model MODEL_DIR = os.path.join(ROOT_DIR, "logs") # Local path to trained weights file COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") # Download COCO trained weights from Releases if needed if not os.path.exists(COCO_MODEL_PATH): mrcnn.utils.download_trained_weights(COCO_MODEL_PATH) # Directory of images to run detection on IMAGE_DIR = os.path.join(ROOT_DIR, "images") # Video file or camera to process - set this to 0 to use your webcam instead of a video file VIDEO_SOURCE = "test_images/parkingsd.gif" # Create a Mask-RCNN model in inference mode self.model = MaskRCNN( mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model self.model.load_weights(COCO_MODEL_PATH, by_name=True) # Load the video file we want to run detection on self.video_capture = cv2.VideoCapture(VIDEO_SOURCE) # How many frames of video we've seen in a row with a parking space open self.free_space_frames = 0 # Filter a list of Mask R-CNN detection results to get only the detected cars / trucks def get_car_boxes(self, boxes, class_ids): car_boxes = [] for i, box in enumerate(boxes): # If the detected object isn't a car / truck, skip it if class_ids[i] in [3, 8, 6]: car_boxes.append(box) return np.array(car_boxes) def analisar(self) -> int: vagas_econtradas = 0 inicio_verificacao = datetime.now() # Location of parking spaces parked_car_boxes = None # Have we sent an SMS alert yet? sms_sent = False # Loop over each frame of video while (datetime.now() - inicio_verificacao).total_seconds() <= 120: success, frame = self.video_capture.read() if not success: break # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = self.model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) if parked_car_boxes is None: # This is the first frame of video - assume all the cars detected are in parking spaces. # Save the location of each car as a parking space box and go to the next frame of video. parked_car_boxes = self.get_car_boxes( r['rois'], r['class_ids']) else: # We already know where the parking spaces are. Check if any are currently unoccupied. # Get where cars are currently located in the frame car_boxes = self.get_car_boxes(r['rois'], r['class_ids']) # See how much those cars overlap with the known parking spaces overlaps = mrcnn.utils.compute_overlaps( parked_car_boxes, car_boxes) # Assume no spaces are free until we find one that is free free_space = False # Loop through each known parking space box for parking_area, overlap_areas in zip(parked_car_boxes, overlaps): # For this parking space, find the max amount it was covered by any # car that was detected in our image (doesn't really matter which car) max_IoU_overlap = np.max(overlap_areas) # Get the top-left and bottom-right coordinates of the parking area y1, x1, y2, x2 = parking_area # Check if the parking space is occupied by seeing if any car overlaps # it by more than 0.15 using IoU if max_IoU_overlap < 0.15: # Parking space not occupied! Draw a green box around it cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) # Flag that we have seen at least one open space free_space = True else: # Parking space is still occupied - draw a red box around it cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 1) # Write the IoU measurement inside the box font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(frame, f"{max_IoU_overlap:0.2}", (x1 + 6, y2 - 6), font, 0.3, (255, 255, 255)) # If at least one space was free, start counting frames # This is so we don't alert based on one frame of a spot being open. # This helps prevent the script triggered on one bad detection. if free_space: self.free_space_frames += 1 else: # If no spots are free, reset the count self.free_space_frames = 0 # If a space has been free for several frames, we are pretty sure it is really free! if self.free_space_frames > 10: # Write SPACE AVAILABLE!! at the top of the screen font = cv2.FONT_HERSHEY_DUPLEX cv2.putText(frame, f"SPACE AVAILABLE!", (10, 150), font, 3.0, (0, 255, 0), 2, cv2.FILLED) # If we haven't sent an SMS yet, sent it! if not sms_sent: print("Vaga encontrada!!!") vagas_econtradas += 1 # Show the frame of video on the screen cv2.imshow('Video', frame) # Hit 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break self.close_app() return vagas_econtradas def close_app(self): # Clean up everything when finished self.video_capture.release() cv2.destroyAllWindows()
class Mask(object): """ Mask R-CNN """ CLASS_NAMES: List[str] = [] COLORS: List[str] = [] model: MaskRCNN = None def __init__(self): self.CLASS_NAMES = classes self.COLORS = extra.getRandomColors(self.CLASS_NAMES) model = getMaskConfig(float(config.detectionMinConfidence)) self.model = MaskRCNN(mode="inference", model_dir=config.LOGS_DIR, config=model) self.model.load_weights(config.DATASET_DIR, by_name=True) @timeChecker.checkElapsedTime(3, 2, 1, "Mask detecting") def pipeline(self, inputPath: str, outputPath: str = None): """ almost main """ if outputPath: dirs.createDirs(os.path.split(outputPath)[0]) filename = os.path.split(outputPath)[1] else: filename = os.path.split(inputPath)[1] cameraId = filename.split('_')[0] img = Image(inputPath, int(cameraId), outputPath=outputPath) binaryImage = img.read() rowDetections = self._detectByMaskCNN(img) detections = _parseR(self._humanizeTypes(rowDetections)) img.addDetections(detections) signedImg = self._visualize_detections(img, rowDetections['masks'], drawMask=False) img.write(outputPath, signedImg) return img def _visualize_detections(self, image: Image, masks, drawMask=False) -> np.ndarray: """ input: the original image, the full object from the mask cnn neural network, and the object ID, if it came out to get it output: an object indicating the objects found in the image, and the image itself, with selected objects and captions """ bgr_image = image.read() font = cv2.FONT_HERSHEY_DUPLEX fontScale = 0.8 thickness = 2 for i, currentObject in enumerate(image.objects): if currentObject.type not in config.availableObjects: continue y1, x1, y2, x2 = currentObject.coordinates lineInClassName = self.CLASS_NAMES.index(currentObject.type) color = [ int(c) for c in np.array(self.COLORS[lineInClassName]) * 255 ] text = "{}: {:.1f}".format(currentObject.type, currentObject.scores * 100) if (drawMask): mask = masks[:, :, i] # берем срез bgr_image = mrcnn.visualize.apply_mask( bgr_image, mask, color, alpha=0.6) # рисование маски cv2.rectangle(bgr_image, (x1, y1), (x2, y2), color, thickness) cv2.putText(bgr_image, text, (x1, y1 - 20), font, fontScale, color, thickness) return bgr_image.astype(np.uint8) def _detectByMaskCNN(self, image: Image): """ input: image - the result of cv2.imread (<filename>) output: r - dictionary of objects found (r ['masks'], r ['rois'], r ['class_ids'], r ['scores']), detailed help somewhere else """ rgbImage = image.getRGBImage() r = self.model.detect([rgbImage], verbose=1)[0] # тут вся магия # проверить что будет если сюда подать НЕ ОДНО ИЗОБРАЖЕНИЕ, А ПОТОК return r def _humanizeTypes(self, r: dict) -> dict: typesList = [ self.CLASS_NAMES[objectClass] for objectClass in r['class_ids'] ] r.update({'class_ids': typesList}) return r
def worker3(): # Video file or camera to process - set this to 0 to use your webcam instead of a video file # VIDEO_SOURCE = "analyze/input/rldc4.mp4" FFMPEG_BIN = "ffmpeg" STREAM_NAME = "livevideo" session = boto3.Session( aws_access_key_id='AKIAR7VQEQKA2IT5HW46', aws_secret_access_key='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', region_name='eu-west-1') kvs = session.client("kinesisvideo") # Grab the endpoint from GetDataEndpoint endpoint = kvs.get_data_endpoint(APIName="GET_HLS_STREAMING_SESSION_URL", StreamName=STREAM_NAME)['DataEndpoint'] # Grab the HLS Stream URL from the endpoint kvam = session.client("kinesis-video-archived-media", endpoint_url=endpoint) VIDEO_URL = kvam.get_hls_streaming_session_url( StreamName=STREAM_NAME, PlaybackMode="LIVE")['HLSStreamingSessionURL'] # Physical capacity of area TOTAL_PARKING_CAPACITY = 4 # Create a Mask-RCNN model in inference mode model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model model.load_weights(COCO_MODEL_PATH, by_name=True) # Location of parking spaces parked_car_boxes = None # Load the video file we want to run detection on video_capture = cv2.VideoCapture(VIDEO_URL) cv2.waitKey(33) frame_counter = 1 has_space = False # Loop over each frame of video while video_capture.isOpened(): success, frame = video_capture.read() if not success: break parking_areas = np.array([]) overlaps = parking_areas if (frame_counter % 100 == 0): print("----------------------------------------------") print("Start detecting cars ....") # Capture frame-by-frame start_time = time.time() # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) # Filter the results to only grab the car / truck bounding boxes car_boxes = get_car_boxes(r['rois'], r['class_ids']) print("car_boxes") # print(car_boxes) print("Cars found in frame of video:") # Draw each box on the frame i = 1 for box in car_boxes: print("Car ", i, ":", box) y1, x1, y2, x2 = box # Draw the box cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1) cv2.putText(frame, "%s" % str(i), (x1, y1), cv2.LINE_AA, 1, (0, 0, 255)) cv2.circle(frame, (x1, y1), 5, (0, 0, 255), -1) i = i + 1 # See how much cars overlap with the known parking spaces print("parking_areas") print(parking_areas) # overlaps = mrcnn.utils.compute_overlaps(car_boxes, parking_areas) # parking_areas # print(len(overlaps.tolist())) print("Checking overlaps .... frame %d" % frame_counter) # print(overlaps) # print(overlaps) # print(overlaps < 0.5) result = space_Violation(overlaps) if result < 2: print("Free Parking Spaces") has_space = True cv2.putText( frame, "Parking Spaces Available : %s" % str(TOTAL_PARKING_CAPACITY - result), (10, 50), cv2.LINE_AA, 1, (100, 255, 0)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) else: has_space = False cv2.putText(frame, "Don't Have Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 0, 255)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) #cv2.imwrite("analyze/output/frame%d.jpg" % frame_counter, frame), for debug cv2.imwrite("analyze/output/frame-analyzed-3.jpg", frame) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) #add a sleep for demo # time.sleep(5) feed4 = Settings() feed4.device.state.update( {'RLDC': TOTAL_PARKING_CAPACITY - result}) print("-----STATE--------") print(feed4.device.state) if has_space: # print("Free Parking Spaces") # TODO - Push to DB cv2.putText(frame, "Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 255, 0)) else: # TODO - Push to DB cv2.putText(frame, "Don't Have Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 0, 255)) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) frame_counter = frame_counter + 1 #Hit 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # Clean up everything when finished video_capture.release() cv2.destroyAllWindows()
type=str, help="Model weights for Mask R-CNN model.") ap.add_argument("-o", "--object-detection", action="store_true", help="Perform object detection using Mask R-CNN model.") args = vars(ap.parse_args()) # Define and load model rcnn = MaskRCNN(mode='inference', model_dir='./', config=TestConfig()) rcnn.load_weights(args["model"], by_name=True) img = load_img(args["image"]) img_pixels = img_to_array(img) results = rcnn.detect([img_pixels], verbose=0) r = results[0] if args["object_detection"]: print("[INFO] Performing object detection using display_instances...") # define 81 classes that the coco model knowns about class_names = load_coco_classes('data/coco_classes.txt') display_instances(img_pixels, r['rois'], r['masks'], r['class_ids'], class_names, r['scores']) else: draw_image_with_boxes(img, r['rois']) print('[INFO] Saving image with bounding boxes') img.save(os.path.join('out', args["image"]))
cfg = PredictionConfig() model = MaskRCNN(mode='inference', model_dir='./', config=cfg) # path to model is given below --> specify this according to your system model_path = 'weights/weight.h5' model.load_weights(model_path, by_name=True) from PIL import Image # image = Image.open('modifies_image.jpeg').convert('RGB') # img_resized = image.resize((600, 600)) # image = np.array(img_resized) image = pyplot.imread('l_light_08_missing_hole_05_1_600.jpg') scaled_image = mold_image(image, cfg) sample = expand_dims(scaled_image, 0) yhat = model.detect(sample, verbose=0) pyplot.imshow(image) pyplot.title('Predicted') ax = pyplot.gca() for box in yhat[0]['rois']: y1, x1, y2, x2 = box width, height = x2 - x1, y2 - y1 rect = Rectangle((x1, y1), width, height, fill=False, color='red') ax.add_patch(rect) pyplot.show() from mrcnn import visualize visualize.display_weight_stats(model)
print('Train: %d' % len(train_set.image_ids)) # load the test dataset test_set = KangarooDataset() test_set.load_dataset('kangaroo', is_train=False) test_set.prepare() print('Test: %d' % len(test_set.image_ids)) # create config cfg = PredictionConfig() # define the model model = MaskRCNN(mode='inference', model_dir='./', config=cfg) # load model weights model_path = 'mask_rcnn_kangaroo_cfg_0005.h5' model.load_weights(model_path, by_name=True) # ------------------------------------------------------ # Compare actual and predict # # plot predictions for train dataset # plot_actual_vs_predicted(train_set, model, cfg) # # plot predictions for test dataset # plot_actual_vs_predicted(test_set, model, cfg) # ---------------------------------------------------------- # load photograph img = load_img('kangaroo117.jpg') img = img_to_array(img) # make prediction results = model.detect([img], verbose=0) # get dictionary for first prediction r = results[0] # show photo with bounding boxes, masks, class labels and scores display_instances(img, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
class Rpicam_ai_node: def __init__(self): #input: None #output: None #Description: Used for initializing all default values for hardware super().__init__() rclpy.init(args=None) self.node = rclpy.create_node('rpicam_ai_node', allow_undeclared_parameters=True) self.wb_mode = self.node.get_parameter_or(self.node.get_namespace() + 'wbmode', 1) self.exposure_compensation = self.node.get_parameter_or(self.node.get_namespace() + 'exposurecompensation', 0) self.rotateflip = self.node.get_parameter_or(self.node.get_namespace() + 'camera_rotate_flip', 0) #flip_method self.image_width = self.node.get_parameter_or(self.node.get_namespace() + 'camera_image_width', 720) self.image_height = self.node.get_parameter_or(self.node.get_namespace() + 'camera_image_height', 720) self.saturation = self.node.get_parameter_or(self.node.get_namespace() + 'saturation', 1) #Model Input and Output Node Names self.input_names = ['input_image'] self.output_names = ['rpn_bbox/concat'] self.initialize_publisher() self.initialize_model() #Initiallizing the camera self.cap = cv2.VideoCapture(self.gstreamer_pipeline(self.wb_mode,self.saturation,self.image_width, self.image_height, 720, 720, 21, self.rotateflip), cv2.CAP_GSTREAMER) rclpy.spin(self.node) def start(self): if not self.cap.isOpened(): self.cap.open(self.gstreamer_pipeline(self.wb_mode,self.saturation,self.image_width, self.image_height, 720, 720, 21, self.rotateflip), cv2.CAP_GSTREAMER) if not hasattr(self, 'thread') or not self.thread.isAlive(): self.thread = threading.Thread(target=self.take_pictures) self.thread.start() def stop(self): if hasattr(self, 'cap'): self.cap.release() if hasattr(self, 'thread'): self.thread.join() def destroy_node(self): super().destroy_node() def initialize_publisher(self): #Publish the object location to "objectcoordinates" topic self.publisher = self.node.create_publisher(Float32MultiArray, 'objectcoordinates',10) #Giving command for taking the pic or not self.sub = self.node.create_subscription(String,'takePicAgain', self.pictureCallback,10) self.manualSub = self.node.create_subscription(String,'manualWeedLoc', self.manualWeedLocation,10) def pictureCallback(self,msg): #input: Take data published by 'takePicAgain' topic #output: None #Description: Used for controlling when to take pic from camera self.inp = msg.data self.initialize_capture_queue() #Nvidia Specific function opening up the camera def gstreamer_pipeline(self,wb_mode,saturation,capture_width, capture_height, display_width, display_height, framerate, flip_method) : return ('nvarguscamerasrc wbmode=(int)%d, saturation=(float)%f ! ' 'video/x-raw(memory:NVMM), ' 'width=(int)%d, height=(int)%d, ' 'format=(string)NV12, framerate=(fraction)%d/1 ! ' 'nvvidconv flip-method=%d ! ' 'video/x-raw, width=(int)%d, height=(int)%d, format=(string)BGRx ! ' 'videoconvert ! ' 'video/x-raw, format=(string)BGR ! appsink' % (wb_mode,saturation,capture_width,capture_height,framerate,flip_method,display_width,display_height)) def initialize_model(self): # create config self.cfg = PredictionConfig() # define the model self.model = MaskRCNN(mode='inference', model_dir='./', config=self.cfg) # load model weights model_path = PRETRAINED_WEIGHT_PATH self.model.load_weights(model_path, by_name=True) self.node.get_logger().info('MODEL IS READY NOW') def initialize_capture_queue(self): #Condition for taking the pic again if self.inp=='y': #Starting the Camera self.take_pictures() else: #Stopping the Camera self.stop() def weedLocation(self,image): #input: Take a input image provided by takePictures() function #output: Returns the number of weeds and its location #Description: Used to convert the detected pixel location of weeds into real distance by some measurement factor HEIGHT = -500.0 #Physically need to decide the ground height # convert image into one sample sample = expand_dims(image, 0) #cv2.imwrite("sample1.jpg",image) weedLoc = [] weedCount = 0 xFactor = 0.4688 #At HEIGHT -490 mm(from above), width(x-axis) 240mm(actual width that camera can capture) , 512x512 pixels (image size) --> 240/512 yFactor = 0.4688 #length(y-axis) 240mm --> 240/512 # make prediction yhat = self.model.detect(sample, verbose=0)[0] boxCount = 0 for classId in yhat['class_ids']: if (classId == 2): #classId 2 belongs to 'weed', classId 1 belongs to 'crop' box = yhat['rois'][boxCount] # get coordinates y1, x1, y2, x2 = box # calculate width and height of the box width, height = x2 - x1, y2 - y1 # create the shape rect = Rectangle((x1, y1), width, height, fill=False, color='red') center = [(x1+(width/2)),(y1+(height/2))] #Shifted origin #To co-align camera and delta-Robot center, camera origin(256,256) shifted to delta robot origin(256,259):(x,y) #with condition camera is fixed cX1 = 260 - center[1] #259 value of y coordinate cY1 = -(center[0] - 257) #256 value of x coordinate cordis = tuple([(cX1*xFactor), (cY1*yFactor), HEIGHT]) weedLoc.append(cordis) cv2.circle(image, (int(center[0]), int(center[1])), 3, (0, 255, 255), -1) cv2.rectangle(image, (x1, y1), (x2, y2), (255,0,0), 2) boxCount+=1 weedCount = len(weedLoc) if weedCount != 0: #Output image storage path flnm = OUTPUT_IMAGE_PATH cv2.imwrite(flnm, image) weedLoc = np.reshape(weedLoc, (1, weedCount*3)) else: data = tuple([0.0,0.0,-280.0]) weedLoc.append(data) weedLoc = np.reshape(weedLoc, (1, 3)) return weedCount,weedLoc def manualWeedLocation(self,msg): #input: Take a input weed data location in form of G100A-20B-430C->(x,y,z) provided by 'manualWeedLoc' topic #output: None #Description: Used for testing purpose by providing location, the delta arm goes to that location or not cordData = msg.data cordA = cordData.find("A") cordB = cordData.find("B") position1 = float(cordData[1:cordA]) position2 = float(cordData[cordA+1:cordB]) position3 = float(cordData[cordB+1:-1]) weedLoc = [] weedCount = 1 if weedCount != 0: data = tuple([position1,position2,position3]) weedLoc.append(data) weedLoc = np.reshape(weedLoc, (1, weedCount*3)) else: data = tuple([0.0,0.0,-280.0]) weedLoc.append(data) weedLoc = np.reshape(weedLoc, (1, 3)) objCnt = weedCount; objCord = weedLoc; #Making up the object location data in format for publishing if objCnt !=0: objCord = np.reshape(objCord,(objCnt*3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) else: objCord = np.reshape(objCord,(3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) msg = Float32MultiArray() msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim[0].label = "row" msg.layout.dim[1].label = "col" msg.layout.dim[0].size = objCnt msg.layout.dim[1].size = 1 msg.layout.dim[0].stride = 3 msg.layout.dim[1].stride = 1 msg.layout.data_offset = 0 msg.data = objCord self.publisher.publish(msg) def take_pictures_test(self): #input: None #output: None #Description: Used for validating, AI is working with camera or not #Getting the bird eye view of the image #persImg = perspective(cv_image) persImg = skimage.io.imread(INPUT_TEST_IMAGE_PATH) objCnt,objCord = self.weedLocation(persImg) #Making up the object location data in format for publishing if objCnt !=0: objCord = np.reshape(objCord,(objCnt*3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) else: objCord = np.reshape(objCord,(3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) msg = Float32MultiArray() msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim[0].label = "row" msg.layout.dim[1].label = "col" msg.layout.dim[0].size = objCnt msg.layout.dim[1].size = 1 msg.layout.dim[0].stride = 3 msg.layout.dim[1].stride = 1 msg.layout.data_offset = 0 msg.data = objCord self.publisher.publish(msg) def take_pictures(self): #input: None #output: None #Description: Used for capturing a single frame from camera if self.cap.isOpened(): cnt =0 #For removing the shadow pic, taking pic after some time while True: ret_val, cv_image = self.cap.read() time.sleep(1) if cnt == 1: break cnt=cnt+1 if ret_val: #Getting the bird eye view of the image persImg = perspective(cv_image) objCnt,objCord = self.weedLocation(persImg) #Predicting the object location #Making up the object location data in format for publishing if objCnt !=0: objCord = np.reshape(objCord,(objCnt*3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) else: objCord = np.reshape(objCord,(3,)) #reshaping in the form of single column objCord = np.ndarray.tolist(objCord) msg = Float32MultiArray() msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim.append(MultiArrayDimension()) msg.layout.dim[0].label = "row" msg.layout.dim[1].label = "col" msg.layout.dim[0].size = objCnt msg.layout.dim[1].size = 1 msg.layout.dim[0].stride = 3 msg.layout.dim[1].stride = 1 msg.layout.data_offset = 0 msg.data = objCord if objCnt !=0: self.publisher.publish(msg) else: self.node.get_logger().info('Not able to read Image!!!!!!') else: print ('Unable to open camera')
def worker4(): # Video file or camera to process - set this to 0 to use your webcam instead of a video file VIDEO_SOURCE = "http://109.236.111.203:90/mjpg/video.mjpg" # Physical capacity of area TOTAL_PARKING_CAPACITY = 120 # Create a Mask-RCNN model in inference mode model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) # Load pre-trained model model.load_weights(COCO_MODEL_PATH, by_name=True) # Location of parking spaces parked_car_boxes = None # Load the video file we want to run detection on video_capture = cv2.VideoCapture(VIDEO_SOURCE) cv2.waitKey(33) frame_counter = 1 has_space = False # Loop over each frame of video while video_capture.isOpened(): success, frame = video_capture.read() if not success: break parking_areas = np.array([[390, 713, 444, 800], [371, 658, 421, 761], [366, 393, 421, 520], [278, 538, 314, 605], [352, 630, 392, 718], [260, 518, 293, 573], [262, 260, 300, 344], [405, 261, 450, 420], [323, 270, 371, 375], [299, 572, 338, 667], [295, 269, 341, 389], [234, 173, 266, 209], [413, 412, 448, 520], [274, 353, 308, 445], [241, 345, 272, 413], [205, 437, 225, 484], [213, 458, 235, 503], [335, 617, 376, 685], [196, 444, 210, 487], [138, 192, 161, 219], [222, 475, 259, 553], [169, 399, 187, 434], [357, 373, 395, 466], [158, 382, 169, 410], [182, 417, 200, 456], [162, 406, 179, 442], [151, 382, 164, 414], [320, 616, 340, 669], [227, 328, 258, 403], [189, 433, 205, 477], [174, 470, 193, 529], [328, 367, 373, 494], [120, 447, 131, 466], [305, 572, 332, 598], [291, 356, 333, 469], [272, 558, 285, 600], [389, 260, 422, 384], [145, 377, 158, 407], [185, 413, 198, 433], [125, 446, 137, 465]]) overlaps = parking_areas if (frame_counter % 100 == 0): print("----------------------------------------------") print("Start detecting cars ....") # Capture frame-by-frame start_time = time.time() # Convert the image from BGR color (which OpenCV uses) to RGB color rgb_image = frame[:, :, ::-1] # Run the image through the Mask R-CNN model to get results. results = model.detect([rgb_image], verbose=0) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] # The r variable will now have the results of detection: # - r['rois'] are the bounding box of each detected object # - r['class_ids'] are the class id (type) of each detected object # - r['scores'] are the confidence scores for each detection # - r['masks'] are the object masks for each detected object (which gives you the object outline) # Filter the results to only grab the car / truck bounding boxes car_boxes = get_car_boxes(r['rois'], r['class_ids']) print("car_boxes") # print(car_boxes) print("Cars found in frame of video:") # Draw each box on the frame i = 1 for box in car_boxes: print("Car ", i, ":", box) y1, x1, y2, x2 = box # Draw the box cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1) cv2.putText(frame, "%s" % str(i), (x1, y1), cv2.LINE_AA, 1, (0, 0, 255)) cv2.circle(frame, (x1, y1), 5, (0, 0, 255), -1) i = i + 1 # See how much cars overlap with the known parking spaces print("parking_areas") print(parking_areas) overlaps = mrcnn.utils.compute_overlaps( car_boxes, parking_areas) # parking_areas print(len(overlaps.tolist())) print("Checking overlaps .... frame %d" % frame_counter) print(overlaps) # print(overlaps) print(overlaps < 0.5) result = space_Violation(overlaps) if result < TOTAL_PARKING_CAPACITY: print("Free Parking Spaces") has_space = True cv2.putText( frame, "Parking Spaces Available : %s" % str(TOTAL_PARKING_CAPACITY - result), (10, 50), cv2.LINE_AA, 1, (100, 255, 0)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) else: has_space = False cv2.putText(frame, "Don't Have Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 0, 255)) # Add Time Stamp time_stamp = time.strftime("%Y/%m/%d %H:%M:%S %Z", time.localtime()) cv2.putText(frame, time_stamp, (950, 710), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0, 0), 1, cv2.LINE_AA) #cv2.imwrite("analyze/output/frame%d.jpg" % frame_counter, frame), for debug cv2.imwrite("analyze/output/frame-analyzed-4.jpg", frame) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) #add a sleep for demo # time.sleep(5) feed2 = Settings() feed2.device.state.update( {'Russia': TOTAL_PARKING_CAPACITY - result}) print("-----STATE--------") print(feed2.device.state) if has_space: # print("Free Parking Spaces") # TODO - Push to DB cv2.putText(frame, "Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 255, 0)) else: # TODO - Push to DB cv2.putText(frame, "Don't Have Free Parking Spaces", (10, 50), cv2.LINE_AA, 1, (0, 0, 255)) # Show the frame of video on the screen, for debug # cv2.imshow('Video', frame) frame_counter = frame_counter + 1 #Hit 'q' to quit if cv2.waitKey(1) & 0xFF == ord('q'): break # Clean up everything when finished video_capture.release() cv2.destroyAllWindows()
validation_data = ShapesDataset() validation_data.load_shapes(1, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1]) validation_data.prepare() img = validation_data.load_image(0) model = MaskRCNN(mode="inference", config=config, model_dir=MODEL_DIR) model.load_weights(COCO_MODEL_PATH, by_name=True, exclude=[ "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask" ]) results = model.detect([img])[0] with open(os.path.join(RESULTS_DIR, 'test_results.pickle'), 'rb') as handle: old_results = pickle.load(handle) class TestTraining(unittest.TestCase): def test_weights(self): weights_before = [ i.trainable_weights[0].eval(sess) for i in model.get_trainable_layers() ] model.train(training_data, validation_data, learning_rate=config.LEARNING_RATE / 10, epochs=1, layers="all")
def main(): argv = sys.argv if "--" not in argv: argv = [] # as if no args are passed else: argv = argv[argv.index("--") + 1:] # get all args after "--" # When --help or no args are given, print this help usage_text = ( "python synth_test.py -- [options]" ) parser = argparse.ArgumentParser(description=usage_text) parser.add_argument( "-isynth", "--input_syndir", dest="synth_path", type=str, required=True, help="Input the synthetic image directory", ) parser.add_argument( "-ijson", "--input_jsonfile", dest="json_path", type=str, required=True, help="Input the annotation file", ) parser.add_argument( "-iweight", "--input_weightfile", dest="weight_path", type=str, required=True, help="Input the weight file", ) parser.add_argument( "-ianota", "--input_anotafile", dest="anota_path", type=str, required=True, help="Input the annotation file", ) args = parser.parse_args(argv) if not argv: parser.print_help() return if (not args.synth_path or not arg.json_path or not args.weight_path or not args.anota_path): print("Error: argument not given, aborting.") parser.print_help() return #change model path here rcnn = MaskRCNN(mode='inference', model_dir='./', config=TestConfig()) #load weight to the model rcnn.load_weights(args.weight_path, by_name=True) for r,d,f in os.walk(args.synth_path): for file in f: annotations = json.load(open(os.path.join(args.json_path, "via_region_data.json"))) filename = file filesize = os.stat(os.path.join(args.synth_path, filename)).st_size key = filename + str(filesize) the_file = open(args.anota_path, 'r') reader = csv.reader(the_file) N = int(filename.split('.')[0]) line = next((x for i, x in enumerate(reader) if i == N), None) the_file.close() image = cv2.imread(os.path.join(args.synth_path, filename)) #get coordinates of signs for ground truth txt preparation marked_image, coord_list = draw_rect(image, args.anota_path, filename) ground_truth_signs = [] for region_num in sorted(annotations[key]['regions']): #print("Sign_name: %s, Id: %d" % # (annotations[key]['regions'][region_num]['region_attributes']['name'], # SIGN_DICT[annotations[key]['regions'][region_num]['region_attributes']['name']])) ground_truth_signs.append(annotations[key]['regions'][region_num]['region_attributes']['name']) # load photograph img = load_img(os.path.join(args.synth_path, filename)) img = img_to_array(img) # make prediction results = rcnn.detect([img], verbose=0) # visualize the results r = results[0] #draw_image_with_boxes(os.path.join(args.synth_path, filename), r['rois'], r['class_ids'], r['scores'], ground_truth_signs, coord_list, filename) file_name = filename.split('.')[0] #generate detected data txt files for mAP calculation generate_predicted_txt(file_name, r['rois'], r['class_ids'], r['scores']) #generate ground truth txt files for mAP calculation generate_ground_truth_txt(file_name, ground_truth_signs, coord_list)
# FPS calculate and timing to frames seconds = 3 fps = video_capture.get(cv2.CAP_PROP_FPS) # Gets the frames per second multiplier = int(fps * seconds) while video_capture.isOpened(): success, frame = video_capture.read() if success: frameId = int(round(video_capture.get(1))) if frameId % multiplier == 0: #rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) rgb_image = imgpros.img_to_array(frame) # Run the image through the Mask R-CNN model to get results. t1 = time.time() results = model.detect([rgb_image], verbose=0) t2 = time.time() print('Time model running = ' + str(t2 - t1)) # Mask R-CNN assumes we are running detection on multiple images. # We only passed in one image to detect, so only grab the first result. r = results[0] parked_car_boxes = get_car_boxes(r['rois'], r['class_ids']) print(parked_car_boxes) print(pinned_car_boxes) overlaps = mrcnn.utils.compute_overlaps(pinned_car_boxes, parked_car_boxes) # overlaps = overlaps.transpose() free_space = False # for parking_area, overlap_areas in zip(parked_car_boxes, overlaps):
#Recreate the model in inference mode model = MaskRCNN(mode='inference', config=inference_config, model_dir='./') #Get path to saved weights model_path = 'mask_rcnn_clouds_config_0001.h5' #Load trained weights print("Loading weights from ", model_path) model.load_weights(model_path, by_name=True) #Test on random validation image image_id = np.random.choice(dataset_val.image_ids) original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\ load_image_gt(dataset_val, inference_config, image_id, use_mini_mask=False) log("original_image", original_image) log("image_meta", image_meta) log("gt_class_id", gt_class_id) log("gt_bbox", gt_bbox) log("gt_mask", gt_mask) visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, dataset_train.class_names, figsize=(8, 8)) #convert image into one sample sample = np.expand_dims(mold_image(original_image, inference_config), 0) model_prediction = model.detect(sample, verbose=1) #Evaluate model performance on training and validation dataset print('Training Set Dice Coefficient {:.2f}'.format(evaluate_model(dataset_train, model, inference_config))) print('Validation Set Dice Coefficient {:.2f}'.format(evaluate_model(dataset_val, model, inference_config)))