def main(): model = create_model(IMAGE_SIZE, ALPHA) model.load_weights(WEIGHTS_FILE) print("IoU on training data") dataset_iou(TRAIN_OUTPUT_FILE, model) print("\nIoU on validation data") dataset_iou(VALIDATION_OUTPUT_FILE, model) print("\nTrying out unscaled image") for k in os.listdir(TRAIN_FOLDER): if "png" in k: break path = os.path.join(TRAIN_FOLDER, k) pred = predict_image(path, model) height, width, _ = cv2.imread(path).shape scaled = np.array([ pred[0] * width / IMAGE_SIZE, pred[1] * height / IMAGE_SIZE, pred[2] * width / IMAGE_SIZE, pred[3] * height / IMAGE_SIZE ]) show_image(path, np.array([0]), np.floor(scaled).astype(int)) print("\nDone")
def main(): model = create_model(IMAGE_SIZE, ALPHA) model.load_weights(WEIGHTS_FILE) ious = [] xmls = sorted(glob.glob("{}/*xml".format(DATASET_FOLDER))) for i, xml_file in enumerate(xmls): print("{}/{}".format(i + 1, len(xmls)), end="\r") tree = ET.parse(xml_file) path = os.path.join(DATASET_FOLDER, tree.findtext("./filename")) height = int(tree.findtext("./size/height")) width = int(tree.findtext("./size/width")) xmin = int(tree.findtext("./object/bndbox/xmin")) ymin = int(tree.findtext("./object/bndbox/ymin")) xmax = int(tree.findtext("./object/bndbox/xmax")) ymax = int(tree.findtext("./object/bndbox/ymax")) # hardcoded fix, the box is wrong if "Abyssinian_1.jpg" in path: xmin -= 160 xmax -= 160 box1 = [(xmin / width) * IMAGE_SIZE, (ymin / height) * IMAGE_SIZE, (xmax / width) * IMAGE_SIZE, (ymax / height) * IMAGE_SIZE] image = cv2.resize(cv2.imread(path), (IMAGE_SIZE, IMAGE_SIZE)) region = model.predict(x=np.array([image]))[0] x, y, w, h = region xmin = x - w / 2 ymin = y - h / 2 xmax = xmin + w ymax = ymin + h box2 = [xmin, ymin, xmax, ymax] iou_ = iou(box1, box2) ious.append(iou_) if DEBUG: print("IoU for {} is {}".format(path, iou_)) cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 1) cv2.rectangle(image, (int(box1[0]), int(box1[1])), (int(box1[2]), int(box1[3])), (0, 255, 0), 1) cv2.imshow("image", image) cv2.waitKey(0) cv2.destroyAllWindows() np.set_printoptions(suppress=True) print("\nAvg IoU: {}".format(np.mean(ious))) print("Highest IoU: {}".format(np.max(ious))) print("Lowest IoU: {}".format(np.min(ious)))
def main(): model = create_model(IMAGE_SIZE, ALPHA) model.load_weights('model-0.98.h5') image_paths = sorted(glob.glob('{}/*png'.format(TEST_SET_FOLDER))) for i, image_path in enumerate(image_paths): image = cv2.resize(cv2.imread(image_path), (IMAGE_SIZE, IMAGE_SIZE)) x, y, w, h = model.predict(x=np.array([image]))[0] cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 0, 255), 1) pathlib.Path(OUTPUT_TEST_PATH).mkdir(parents=True, exist_ok=True) cv2.imwrite(f'{OUTPUT_TEST_PATH}/image{i}.png', image)
def __init__(self, environment_name, weight_file_name, image_size, ALPHA, grasp_activated, number_of_elements_to_be_output): self._image_size = image_size self._number_of_elements_to_be_output = number_of_elements_to_be_output self.init_rviz_markers() # Init service to move fetch, this is because moveit doenst work in python 3 self._grasp_activated = grasp_activated if self._grasp_activated == True: self.fetch_move_client = MoveFetchClient() self.fetch_move_client.go_to_safe_arm_pose() # Init camera RGB object self.rgb_camera_object = RGBCamera("/dynamic_objects/camera/raw_image") # This are the models that we will generate information about. self.model_to_track_name = "demo_spam1" self.table_to_track_name = "demo_table1" model_to_track_list = [ self.model_to_track_name, self.table_to_track_name ] self.gz_model_obj = GazeboModel(model_to_track_list) # We start the model in Keras self.model = create_model(self._image_size, ALPHA, self._number_of_elements_to_be_output) rospack = rospkg.RosPack() # get the file path for rospy_tutorials path_to_package = rospack.get_path('my_dcnn_training_pkg') models_weight_checkpoints_folder = os.path.join(path_to_package, "bk") model_file_path = os.path.join(models_weight_checkpoints_folder, weight_file_name) print(model_file_path) self.model.load_weights(model_file_path) self.testing_unscaled_img_folder = os.path.join( path_to_package, "testing/dataset_gen/images") self.testing_unscaled_anotations_folder = os.path.join( path_to_package, "testing/dataset_gen_annotations") # We reset the environent to a random state print("Starting Service to Reset World Randomly....") self.dynamic_world_service_call = rospy.ServiceProxy( '/dynamic_world_service', Empty) self.change_env_request = EmptyRequest() self.dynamic_world_service_call(self.change_env_request) print("Starting Service to Reset World Randomly....DONE")
def load_model(model_class, model_weights_filename, embeddings_filename, max_len=50): model_params = { 'max_len': max_len, 'rnn_size': 300, 'hidden_size': 300, 'rnn_cell': 'LSTM', 'regularization': 0.000001, 'dropout': 0.5, 'trainable_embeddings': False, } # load embeddings W_emb = load_pickle(embeddings_filename) model_params['W_emb'] = W_emb logging.info('Embeddings restored: %s', os.path.basename(embeddings_filename)) # create model model = create_model(model_class, model_params) # load weights model.load_weights(model_weights_filename) logging.info('Weights restored: %s', os.path.basename(model_weights_filename)) return model
def start_fall_detector_FDD(video_path, annotation_path): ''' Uses the annotated frames from the FDD dataset for spatial stream ''' model = create_model("weights/weights.hdf5") video = cv.VideoCapture(video_path) if not video.isOpened(): print("Cannot open video {}".format(video_path)) return annotation_file = open(annotation_path, "r") annotation_file.readline() # Skip line 1 annotation_file.readline() # Skip line 2 cv.namedWindow("Capture") cv.namedWindow("Cropped") cv.namedWindow("MHI") cv.moveWindow("Capture", 100, 100) cv.moveWindow("Cropped", 500, 100) cv.moveWindow("MHI", 800, 100) fps = int(video.get(cv.CAP_PROP_FPS)) interval = int(max(1, math.ceil(fps/10) if (fps/10 - math.floor(fps/10)) >= 0.5 else math.floor(fps/10))) ms_per_frame = 1000 / fps # milliseconds count = interval prev_mhi = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32) for i in range(interval)] prev_timestamp = [i * ms_per_frame for i in range(interval)] prev_frames = [None] * interval for i in range(interval): ret, frame = video.read() frame = cv.resize(frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA) frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0) prev_frames[i] = frame.copy() fall_frames_seen = 0 # Number of consecutive fall frames seen so far fall_detected = False MIN_NUM_FALL_FRAME = int(fps/10) # Need at least some number of frames to avoid flickery classifications while True: start_time = timer() ret, orig_frame = video.read() if not ret: break # Crop out bounding box annotations = annotation_file.readline().strip().split(",") x_start = int(annotations[2]) y_start = int(annotations[3]) x_end = int(annotations[4]) y_end = int(annotations[5]) if (x_start <= 0 and y_start <= 0 and x_end <= 0 and y_end <= 0): x_start = 0 y_start = 0 x_end = frame.shape[0] y_end = frame.shape[1] cropped = orig_frame[y_start:y_end, x_start:x_end].copy() try: cropped = cv.resize(cropped, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR) except: cropped = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR) labelled_frame = orig_frame.copy() cv.rectangle( labelled_frame, (x_start, y_start), (x_end, y_end), color = green, lineType = line_type ) # Create MHI prev_ind = count % interval prev_timestamp[prev_ind] += interval * ms_per_frame count += 1 frame = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA) frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0) frame_diff = cv.absdiff(frame, prev_frames[prev_ind]) gray_diff = cv.cvtColor(frame_diff, cv.COLOR_BGR2GRAY) _, motion_mask = cv.threshold(gray_diff, THRESHOLD, 1, cv.THRESH_BINARY) cv.motempl.updateMotionHistory(motion_mask, prev_mhi[prev_ind], prev_timestamp[prev_ind], MHI_DURATION) mhi = np.uint8(np.clip((prev_mhi[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION))/MHI_DURATION, 0, 1) * 255) prev_frames[prev_ind] = frame.copy() # Prepare input spatial_input = cropped.copy().astype(np.float32) spatial_input = cv.cvtColor(spatial_input, cv.COLOR_BGR2RGB) spatial_input = np.array([spatial_input]) temporal_input = mhi.copy().astype(np.float32) temporal_input = cv.cvtColor(temporal_input, cv.COLOR_GRAY2RGB) temporal_input = np.array([temporal_input]) preprocess_input(spatial_input) preprocess_input(temporal_input) # Make prediction prediction = np.round(model.predict([spatial_input, temporal_input]))[0] is_fall = prediction == 1 if is_fall: fall_frames_seen = min(fall_frames_seen + 1, MIN_NUM_FALL_FRAME) else: fall_frames_seen = max(fall_frames_seen - 1, 0) if fall_frames_seen == MIN_NUM_FALL_FRAME: fall_detected = True elif fall_frames_seen == 0: fall_detected = False cv.putText( labelled_frame, "Status: {}".format("Fall detected!" if fall_detected else "No fall"), (5, 20), fontFace = font, fontScale = 0.5, color = red if fall_detected else green, lineType = line_type ) # Show images cv.imshow("Capture", labelled_frame) cv.imshow("Cropped", cropped) cv.imshow("MHI", mhi) # Compensate for elapsed time used to process frame wait_time = int(max(1, ms_per_frame - (timer() - start_time) * 1000)) if cv.waitKey(wait_time) == 27: break video.release() cv.destroyAllWindows() annotation_file.close()
def start_fall_detector_realtime(input_path = 0): ''' Capture RGB and MHI in real time and feed into model ''' model = create_model(WEIGHTS_PATH) cap = cv.VideoCapture(input_path) if not cap.isOpened(): print("Cannot open video/webcam {}".format(input_path)) return MHI_DURATION_SHORT = 300 # Uses for putting bounding box on recent motion fps = int(cap.get(cv.CAP_PROP_FPS)) cap_width = cap.get(cv.CAP_PROP_FRAME_WIDTH) cap_height = cap.get(cv.CAP_PROP_FRAME_HEIGHT) interval = int(max(1, math.ceil(fps/10) if (fps/10 - math.floor(fps/10)) >= 0.5 else math.floor(fps/10))) ms_per_frame = 1000 / fps # milliseconds count = interval prev_mhi = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32) for i in range(interval)] prev_mhi_short = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32)] * interval prev_timestamp = [i * ms_per_frame for i in range(interval)] prev_frames = [None] * interval for i in range(interval): ret, frame = cap.read() frame = cv.resize(frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA) frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0) prev_frames[i] = frame.copy() fall_frames_seen = 0 # Number of consecutive fall frames seen so far fall_detected = False MIN_NUM_FALL_FRAME = int(fps/10) # Need at least some number of frames to avoid flickery classifications cv.namedWindow("Capture") cv.namedWindow("Cropped") cv.namedWindow("MHI") cv.moveWindow("Capture", 100, 100) cv.moveWindow("Cropped", 500, 100) cv.moveWindow("MHI", 800, 100) while True: start_time = timer() ret, orig_frame = cap.read() if not ret: break # Create MHI prev_ind = count % interval prev_timestamp[prev_ind] += interval * ms_per_frame count += 1 frame = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA) frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0) frame_diff = cv.absdiff(frame, prev_frames[prev_ind]) gray_diff = cv.cvtColor(frame_diff, cv.COLOR_BGR2GRAY) _, motion_mask = cv.threshold(gray_diff, THRESHOLD, 1, cv.THRESH_BINARY) prev_frames[prev_ind] = frame.copy() cv.motempl.updateMotionHistory(motion_mask, prev_mhi[prev_ind], prev_timestamp[prev_ind], MHI_DURATION) cv.motempl.updateMotionHistory(motion_mask, prev_mhi_short[prev_ind], prev_timestamp[prev_ind], MHI_DURATION_SHORT) mhi = np.uint8(np.clip((prev_mhi[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION))/MHI_DURATION, 0, 1) * 255) mhi_short = np.uint8(np.clip((prev_mhi_short[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION_SHORT))/MHI_DURATION_SHORT, 0, 1) * 255) # Crop out motion x_start = y_start = IMAGE_SIZE x_end = y_end = 0 contours, _ = cv.findContours(mhi_short, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) if (len(contours) > 0): for c in contours: contour = cv.approxPolyDP(c, 3, True) x, y, w, h = cv.boundingRect(contour) if x < x_start: x_start = x if y < y_start: y_start = y if x + w > x_end: x_end = x + w if y + h > y_end: y_end = y + h else: x_start = y_start = 0 x_end = y_end = IMAGE_SIZE x_start = int(np.round(x_start / IMAGE_SIZE * cap_width)) y_start = int(np.round(y_start / IMAGE_SIZE * cap_height)) x_end = int(np.round(x_end / IMAGE_SIZE * cap_width)) y_end = int(np.round(y_end / IMAGE_SIZE * cap_height)) labelled_frame = orig_frame.copy() cv.rectangle( labelled_frame, (x_start, y_start), (x_end, y_end), color = green, lineType = line_type ) cropped = orig_frame[y_start:y_end, x_start:x_end].copy() try: cropped = cv.resize(cropped, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR) except: cropped = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR) # Prepare input spatial_input = cropped.copy().astype(np.float32) spatial_input = cv.cvtColor(spatial_input, cv.COLOR_BGR2RGB) spatial_input = np.array([spatial_input]) temporal_input = mhi.copy().astype(np.float32) temporal_input = cv.cvtColor(temporal_input, cv.COLOR_GRAY2RGB) temporal_input = np.array([temporal_input]) preprocess_input(spatial_input) preprocess_input(temporal_input) # Make prediction prediction = np.round(model.predict([spatial_input, temporal_input]))[0] is_fall = prediction == 1 if is_fall: fall_frames_seen = min(fall_frames_seen + 1, MIN_NUM_FALL_FRAME) else: fall_frames_seen = max(fall_frames_seen - 1, 0) if fall_frames_seen == MIN_NUM_FALL_FRAME: fall_detected = True elif fall_frames_seen == 0: fall_detected = False cv.putText( labelled_frame, "Status: {}".format("Fall detected!" if fall_detected else "No fall"), (5, 20), fontFace = font, fontScale = 0.5, color = red if fall_detected else green, lineType = line_type ) # Show images cv.imshow("Capture", labelled_frame) cv.imshow("Cropped", cropped) cv.imshow("MHI", mhi) # Compensate for elapsed time used to process frame wait_time = int(max(1, ms_per_frame - (timer() - start_time) * 1000)) if cv.waitKey(wait_time) == 27: break cap.release() cv.destroyAllWindows()