示例#1
0
def main():
    model = create_model(IMAGE_SIZE, ALPHA)
    model.load_weights(WEIGHTS_FILE)

    print("IoU on training data")
    dataset_iou(TRAIN_OUTPUT_FILE, model)

    print("\nIoU on validation data")
    dataset_iou(VALIDATION_OUTPUT_FILE, model)

    print("\nTrying out unscaled image")
    for k in os.listdir(TRAIN_FOLDER):
        if "png" in k:
            break
    path = os.path.join(TRAIN_FOLDER, k)
    pred = predict_image(path, model)

    height, width, _ = cv2.imread(path).shape
    scaled = np.array([
        pred[0] * width / IMAGE_SIZE, pred[1] * height / IMAGE_SIZE,
        pred[2] * width / IMAGE_SIZE, pred[3] * height / IMAGE_SIZE
    ])
    show_image(path, np.array([0]), np.floor(scaled).astype(int))

    print("\nDone")
def main():
    model = create_model(IMAGE_SIZE, ALPHA)
    model.load_weights(WEIGHTS_FILE)

    ious = []
    xmls = sorted(glob.glob("{}/*xml".format(DATASET_FOLDER)))
    for i, xml_file in enumerate(xmls):
        print("{}/{}".format(i + 1, len(xmls)), end="\r")
        tree = ET.parse(xml_file)

        path = os.path.join(DATASET_FOLDER, tree.findtext("./filename"))
        height = int(tree.findtext("./size/height"))
        width = int(tree.findtext("./size/width"))
        xmin = int(tree.findtext("./object/bndbox/xmin"))
        ymin = int(tree.findtext("./object/bndbox/ymin"))
        xmax = int(tree.findtext("./object/bndbox/xmax"))
        ymax = int(tree.findtext("./object/bndbox/ymax"))

        # hardcoded fix, the box is wrong
        if "Abyssinian_1.jpg" in path:
            xmin -= 160
            xmax -= 160

        box1 = [(xmin / width) * IMAGE_SIZE, (ymin / height) * IMAGE_SIZE,
                (xmax / width) * IMAGE_SIZE, (ymax / height) * IMAGE_SIZE]

        image = cv2.resize(cv2.imread(path), (IMAGE_SIZE, IMAGE_SIZE))
        region = model.predict(x=np.array([image]))[0]
        x, y, w, h = region

        xmin = x - w / 2
        ymin = y - h / 2
        xmax = xmin + w
        ymax = ymin + h

        box2 = [xmin, ymin, xmax, ymax]

        iou_ = iou(box1, box2)
        ious.append(iou_)

        if DEBUG:
            print("IoU for {} is {}".format(path, iou_))
            cv2.rectangle(image, (int(xmin), int(ymin)),
                          (int(xmax), int(ymax)), (0, 0, 255), 1)
            cv2.rectangle(image, (int(box1[0]), int(box1[1])),
                          (int(box1[2]), int(box1[3])), (0, 255, 0), 1)
            cv2.imshow("image", image)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

    np.set_printoptions(suppress=True)
    print("\nAvg IoU: {}".format(np.mean(ious)))
    print("Highest IoU: {}".format(np.max(ious)))
    print("Lowest IoU: {}".format(np.min(ious)))
示例#3
0
def main():
    model = create_model(IMAGE_SIZE, ALPHA)
    model.load_weights('model-0.98.h5')

    image_paths = sorted(glob.glob('{}/*png'.format(TEST_SET_FOLDER)))
    for i, image_path in enumerate(image_paths):
        image = cv2.resize(cv2.imread(image_path), (IMAGE_SIZE, IMAGE_SIZE))
        x, y, w, h = model.predict(x=np.array([image]))[0]
        cv2.rectangle(image, (int(x - w / 2), int(y - h / 2)),
                      (int(x + w / 2), int(y + h / 2)), (0, 0, 255), 1)
        pathlib.Path(OUTPUT_TEST_PATH).mkdir(parents=True, exist_ok=True)
        cv2.imwrite(f'{OUTPUT_TEST_PATH}/image{i}.png', image)
示例#4
0
    def __init__(self, environment_name, weight_file_name, image_size, ALPHA,
                 grasp_activated, number_of_elements_to_be_output):

        self._image_size = image_size
        self._number_of_elements_to_be_output = number_of_elements_to_be_output

        self.init_rviz_markers()

        # Init service to move fetch, this is because moveit doenst work in python 3
        self._grasp_activated = grasp_activated
        if self._grasp_activated == True:
            self.fetch_move_client = MoveFetchClient()
            self.fetch_move_client.go_to_safe_arm_pose()

        # Init camera RGB object
        self.rgb_camera_object = RGBCamera("/dynamic_objects/camera/raw_image")

        # This are the models that we will generate information about.
        self.model_to_track_name = "demo_spam1"
        self.table_to_track_name = "demo_table1"

        model_to_track_list = [
            self.model_to_track_name, self.table_to_track_name
        ]
        self.gz_model_obj = GazeboModel(model_to_track_list)

        # We start the model in Keras
        self.model = create_model(self._image_size, ALPHA,
                                  self._number_of_elements_to_be_output)

        rospack = rospkg.RosPack()
        # get the file path for rospy_tutorials
        path_to_package = rospack.get_path('my_dcnn_training_pkg')
        models_weight_checkpoints_folder = os.path.join(path_to_package, "bk")
        model_file_path = os.path.join(models_weight_checkpoints_folder,
                                       weight_file_name)

        print(model_file_path)

        self.model.load_weights(model_file_path)

        self.testing_unscaled_img_folder = os.path.join(
            path_to_package, "testing/dataset_gen/images")
        self.testing_unscaled_anotations_folder = os.path.join(
            path_to_package, "testing/dataset_gen_annotations")

        # We reset the environent to a random state
        print("Starting Service to Reset World Randomly....")
        self.dynamic_world_service_call = rospy.ServiceProxy(
            '/dynamic_world_service', Empty)
        self.change_env_request = EmptyRequest()
        self.dynamic_world_service_call(self.change_env_request)
        print("Starting Service to Reset World Randomly....DONE")
示例#5
0
def load_model(model_class, model_weights_filename, embeddings_filename, max_len=50):
    model_params = {
        'max_len': max_len,
        'rnn_size': 300,
        'hidden_size': 300,
        'rnn_cell': 'LSTM',
        'regularization': 0.000001,
        'dropout': 0.5,
        'trainable_embeddings': False,
    }

    # load embeddings
    W_emb = load_pickle(embeddings_filename)
    model_params['W_emb'] = W_emb
    logging.info('Embeddings restored: %s', os.path.basename(embeddings_filename))

    # create model
    model = create_model(model_class, model_params)

    # load weights
    model.load_weights(model_weights_filename)
    logging.info('Weights restored: %s', os.path.basename(model_weights_filename))

    return model
示例#6
0
def start_fall_detector_FDD(video_path, annotation_path):
    ''' Uses the annotated frames from the FDD dataset for spatial stream '''
    model = create_model("weights/weights.hdf5")
    video = cv.VideoCapture(video_path)
    if not video.isOpened():
        print("Cannot open video {}".format(video_path))
        return

    annotation_file = open(annotation_path, "r")
    annotation_file.readline() # Skip line 1
    annotation_file.readline() # Skip line 2

    cv.namedWindow("Capture")
    cv.namedWindow("Cropped")
    cv.namedWindow("MHI")
    cv.moveWindow("Capture", 100, 100)
    cv.moveWindow("Cropped", 500, 100)
    cv.moveWindow("MHI", 800, 100)

    fps = int(video.get(cv.CAP_PROP_FPS))
    interval = int(max(1, math.ceil(fps/10) if (fps/10 - math.floor(fps/10)) >= 0.5 else math.floor(fps/10)))
    ms_per_frame = 1000 / fps # milliseconds
    count = interval

    prev_mhi = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32) for i in range(interval)]
    prev_timestamp = [i * ms_per_frame for i in range(interval)]
    prev_frames = [None] * interval
    for i in range(interval):
        ret, frame = video.read()
        frame = cv.resize(frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA)
        frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
        prev_frames[i] = frame.copy()

    fall_frames_seen = 0 # Number of consecutive fall frames seen so far
    fall_detected = False
    MIN_NUM_FALL_FRAME = int(fps/10) # Need at least some number of frames to avoid flickery classifications

    while True:
        start_time = timer()
        ret, orig_frame = video.read()
        if not ret:
            break

        # Crop out bounding box
        annotations = annotation_file.readline().strip().split(",")
        x_start = int(annotations[2])
        y_start = int(annotations[3])
        x_end = int(annotations[4])
        y_end = int(annotations[5])
        if (x_start <= 0 and y_start <= 0 and x_end <= 0 and y_end <= 0):
            x_start = 0
            y_start = 0
            x_end = frame.shape[0]
            y_end = frame.shape[1]

        cropped = orig_frame[y_start:y_end, x_start:x_end].copy()
        try:
            cropped = cv.resize(cropped, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR)
        except:
            cropped = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR)

        labelled_frame = orig_frame.copy()
        cv.rectangle(
            labelled_frame, (x_start, y_start), (x_end, y_end),
            color = green, lineType = line_type
        )

        # Create MHI
        prev_ind = count % interval
        prev_timestamp[prev_ind] += interval * ms_per_frame
        count += 1

        frame = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA)
        frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
        frame_diff = cv.absdiff(frame, prev_frames[prev_ind])
        gray_diff = cv.cvtColor(frame_diff, cv.COLOR_BGR2GRAY)
        _, motion_mask = cv.threshold(gray_diff, THRESHOLD, 1, cv.THRESH_BINARY)

        cv.motempl.updateMotionHistory(motion_mask, prev_mhi[prev_ind], prev_timestamp[prev_ind], MHI_DURATION)
        mhi = np.uint8(np.clip((prev_mhi[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION))/MHI_DURATION, 0, 1) * 255)
        prev_frames[prev_ind] = frame.copy()

        # Prepare input
        spatial_input = cropped.copy().astype(np.float32)
        spatial_input = cv.cvtColor(spatial_input, cv.COLOR_BGR2RGB)
        spatial_input = np.array([spatial_input])
        temporal_input = mhi.copy().astype(np.float32)
        temporal_input = cv.cvtColor(temporal_input, cv.COLOR_GRAY2RGB)
        temporal_input = np.array([temporal_input])
        preprocess_input(spatial_input)
        preprocess_input(temporal_input)

        # Make prediction
        prediction = np.round(model.predict([spatial_input, temporal_input]))[0]
        is_fall = prediction == 1
        if is_fall:
            fall_frames_seen = min(fall_frames_seen + 1, MIN_NUM_FALL_FRAME)
        else:
            fall_frames_seen = max(fall_frames_seen - 1, 0)

        if fall_frames_seen == MIN_NUM_FALL_FRAME:
            fall_detected = True
        elif fall_frames_seen == 0:
            fall_detected = False

        cv.putText(
            labelled_frame, "Status: {}".format("Fall detected!" if fall_detected else "No fall"), (5, 20),
            fontFace = font, fontScale = 0.5, color = red if fall_detected else green, lineType = line_type
        )

        # Show images
        cv.imshow("Capture", labelled_frame)
        cv.imshow("Cropped", cropped)
        cv.imshow("MHI", mhi)

        # Compensate for elapsed time used to process frame
        wait_time = int(max(1, ms_per_frame - (timer() - start_time) * 1000))
        if cv.waitKey(wait_time) == 27:
            break

    video.release()
    cv.destroyAllWindows()
    annotation_file.close()
示例#7
0
def start_fall_detector_realtime(input_path = 0):
    ''' Capture RGB and MHI in real time and feed into model '''
    model = create_model(WEIGHTS_PATH)
    cap = cv.VideoCapture(input_path)
    if not cap.isOpened():
        print("Cannot open video/webcam {}".format(input_path))
        return

    MHI_DURATION_SHORT = 300 # Uses for putting bounding box on recent motion

    fps = int(cap.get(cv.CAP_PROP_FPS))
    cap_width = cap.get(cv.CAP_PROP_FRAME_WIDTH)
    cap_height = cap.get(cv.CAP_PROP_FRAME_HEIGHT)
    interval = int(max(1, math.ceil(fps/10) if (fps/10 - math.floor(fps/10)) >= 0.5 else math.floor(fps/10)))
    ms_per_frame = 1000 / fps # milliseconds
    count = interval

    prev_mhi = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32) for i in range(interval)]
    prev_mhi_short = [np.zeros((IMAGE_SIZE, IMAGE_SIZE), np.float32)] * interval
    prev_timestamp = [i * ms_per_frame for i in range(interval)]
    prev_frames = [None] * interval
    for i in range(interval):
        ret, frame = cap.read()
        frame = cv.resize(frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA)
        frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
        prev_frames[i] = frame.copy()

    fall_frames_seen = 0 # Number of consecutive fall frames seen so far
    fall_detected = False
    MIN_NUM_FALL_FRAME = int(fps/10) # Need at least some number of frames to avoid flickery classifications

    cv.namedWindow("Capture")
    cv.namedWindow("Cropped")
    cv.namedWindow("MHI")
    cv.moveWindow("Capture", 100, 100)
    cv.moveWindow("Cropped", 500, 100)
    cv.moveWindow("MHI", 800, 100)

    while True:
        start_time = timer()
        ret, orig_frame = cap.read()
        if not ret:
            break

        # Create MHI
        prev_ind = count % interval
        prev_timestamp[prev_ind] += interval * ms_per_frame
        count += 1

        frame = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_AREA)
        frame = cv.GaussianBlur(frame, GAUSSIAN_KERNEL, 0)
        frame_diff = cv.absdiff(frame, prev_frames[prev_ind])
        gray_diff = cv.cvtColor(frame_diff, cv.COLOR_BGR2GRAY)
        _, motion_mask = cv.threshold(gray_diff, THRESHOLD, 1, cv.THRESH_BINARY)
        prev_frames[prev_ind] = frame.copy()

        cv.motempl.updateMotionHistory(motion_mask, prev_mhi[prev_ind], prev_timestamp[prev_ind], MHI_DURATION)
        cv.motempl.updateMotionHistory(motion_mask, prev_mhi_short[prev_ind], prev_timestamp[prev_ind], MHI_DURATION_SHORT)
        mhi = np.uint8(np.clip((prev_mhi[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION))/MHI_DURATION, 0, 1) * 255)
        mhi_short = np.uint8(np.clip((prev_mhi_short[prev_ind] - (prev_timestamp[prev_ind] - MHI_DURATION_SHORT))/MHI_DURATION_SHORT, 0, 1) * 255)

        # Crop out motion
        x_start = y_start = IMAGE_SIZE
        x_end = y_end = 0
        contours, _ = cv.findContours(mhi_short, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
        if (len(contours) > 0):
            for c in contours:
                contour = cv.approxPolyDP(c, 3, True)
                x, y, w, h = cv.boundingRect(contour)
                if x < x_start:
                    x_start = x
                if y < y_start:
                    y_start = y
                if x + w > x_end:
                    x_end = x + w
                if y + h > y_end:
                    y_end = y + h
        else:
            x_start = y_start = 0
            x_end = y_end = IMAGE_SIZE

        x_start = int(np.round(x_start / IMAGE_SIZE * cap_width))
        y_start = int(np.round(y_start / IMAGE_SIZE * cap_height))
        x_end = int(np.round(x_end / IMAGE_SIZE * cap_width))
        y_end = int(np.round(y_end / IMAGE_SIZE * cap_height))
        labelled_frame = orig_frame.copy()
        cv.rectangle(
            labelled_frame, (x_start, y_start), (x_end, y_end),
            color = green, lineType = line_type
        )
        cropped = orig_frame[y_start:y_end, x_start:x_end].copy()
        try:
            cropped = cv.resize(cropped, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR)
        except:
            cropped = cv.resize(orig_frame, (IMAGE_SIZE, IMAGE_SIZE), interpolation = cv.INTER_LINEAR)

        # Prepare input
        spatial_input = cropped.copy().astype(np.float32)
        spatial_input = cv.cvtColor(spatial_input, cv.COLOR_BGR2RGB)
        spatial_input = np.array([spatial_input])
        temporal_input = mhi.copy().astype(np.float32)
        temporal_input = cv.cvtColor(temporal_input, cv.COLOR_GRAY2RGB)
        temporal_input = np.array([temporal_input])
        preprocess_input(spatial_input)
        preprocess_input(temporal_input)

        # Make prediction
        prediction = np.round(model.predict([spatial_input, temporal_input]))[0]
        is_fall = prediction == 1
        if is_fall:
            fall_frames_seen = min(fall_frames_seen + 1, MIN_NUM_FALL_FRAME)
        else:
            fall_frames_seen = max(fall_frames_seen - 1, 0)

        if fall_frames_seen == MIN_NUM_FALL_FRAME:
            fall_detected = True
        elif fall_frames_seen == 0:
            fall_detected = False

        cv.putText(
            labelled_frame, "Status: {}".format("Fall detected!" if fall_detected else "No fall"), (5, 20),
            fontFace = font, fontScale = 0.5, color = red if fall_detected else green, lineType = line_type
        )

        # Show images
        cv.imshow("Capture", labelled_frame)
        cv.imshow("Cropped", cropped)
        cv.imshow("MHI", mhi)

        # Compensate for elapsed time used to process frame
        wait_time = int(max(1, ms_per_frame - (timer() - start_time) * 1000))
        if cv.waitKey(wait_time) == 27:
            break

    cap.release()
    cv.destroyAllWindows()