def get_traffic_stop_det_objs( traffic_stops, camera_transform, depth_frame, frame_width, frame_height, fov): """ Get traffic stop lane markings that are withing the camera frame. Args: traffic_stops: List of traffic stop actors in the world. camera_transform: Camera transform in world coordinates. fov: Camera field of view. Returns: List of DetectedObjects. """ det_objs = [] bgr_intrinsic = create_intrinsic_matrix(frame_width, frame_height, fov) for transform, bbox in traffic_stops: bbox2d = _get_stop_markings_bbox( bbox, depth_frame, camera_transform, bgr_intrinsic, frame_width, frame_height) if bbox2d: det_objs.append(DetectedObject(bbox2d, 1.0, 'stop marking')) return det_objs
def track(self, frame): # each track in tracks has format ([xmin, ymin, xmax, ymax], id) tracked_objects = [] for track in self.tracker.trackers: coords = track.predict()[0].tolist() # changing to xmin, xmax, ymin, ymax format coords = (coords[0], coords[2], coords[1], coords[3]) tracked_objects.append(DetectedObject(coords, "", 0, track.id)) return True, tracked_objects
def track(self, frame): self._tracker = SiamRPN_track(self._tracker, frame) target_pos = self._tracker['target_pos'] target_sz = self._tracker['target_sz'] bbox = (int(target_pos[0] - target_sz[0] / 2.0), int(target_pos[0] + target_sz[0] / 2.0), int(target_pos[1] - target_sz[1] / 2.0), int(target_pos[1] + target_sz[1] / 2.0)) return DetectedObject(bbox, "", 0)
def get_traffic_light_det_objs_legacy(traffic_lights, vehicle_transform, camera_transform, depth_frame, frame_width, frame_height, fov, segmented_frame): """ Get the traffic lights that are withing the camera frame. Note: this method works with Carla 0.8.4. Args: traffic_lights: List of traffic lights in the world. vehicle_transform: Ego-vehicle transform in world coordinates. camera_transform: Camera transform in world coordinates. fov: Camera field of view. segmented_frame: Segmented frame. """ # Get 3d world positions for all traffic signs (some of which are # traffic lights). traffic_signs_frame = get_traffic_sign_pixels(segmented_frame) bboxes = get_bounding_boxes_from_segmented(traffic_signs_frame) # Get the positions of the bounding box centers. x_mids = [(bbox[0] + bbox[1]) / 2 for bbox in bboxes] y_mids = [(bbox[2] + bbox[3]) / 2 for bbox in bboxes] pos_3d = batch_get_3d_world_position_with_depth_map( x_mids, y_mids, depth_frame, frame_width, frame_height, fov, camera_transform) pos_and_bboxes = zip(pos_3d, bboxes) # Map traffic lights to bounding boxes based on 3d world position. tl_bboxes = match_bboxes_with_traffic_lights(vehicle_transform, pos_and_bboxes, traffic_lights) det_objs = [] for bbox, color in tl_bboxes: if color == TrafficLightColor.GREEN: det_objs.append(DetectedObject(bbox, 1.0, 'green traffic light')) elif color == TrafficLightColor.YELLOW: det_objs.append(DetectedObject(bbox, 1.0, 'yellow traffic light')) elif color == TrafficLightColor.RED: det_objs.append(DetectedObject(bbox, 1.0, 'red traffic light')) else: det_objs.append(DetectedObject(bbox, 1.0, 'off traffic light')) return det_objs
def get_traffic_light_det_objs(traffic_lights, camera_transform, depth_array, segmented_image, frame_width, frame_height, town_name, fov=90): """ Get the traffic lights that are within the camera frame. Note: This method should be used with Carla 0.9.* """ # Create the extrinsic and intrinsic matrices for the given camera. extrinsic_matrix = camera_transform.matrix intrinsic_matrix = create_intrinsic_matrix(frame_width, frame_height, fov) # Iterate over all the traffic lights, and figure out which ones are # facing us and are visible in the camera view. detected = [] for light in traffic_lights: if not is_traffic_light_visible(camera_transform, light, town_name): continue bboxes = get_traffic_lights_bbox_state(camera_transform, [light], town_name) # Convert the returned bounding boxes to 2D and check if the # light is occluded. If not, add it to the detected object list. for box, color in bboxes: bounding_box = locations_3d_to_view(box, extrinsic_matrix, intrinsic_matrix) bounding_box = [(bb.x, bb.y, bb.z) for bb in bounding_box] thresholded_coordinates = get_bounding_box_in_camera_view( bounding_box, frame_width, frame_height) if not thresholded_coordinates: continue xmin, xmax, ymin, ymax = thresholded_coordinates # Crop the segmented and depth image to the given bounding box. cropped_image = segmented_image[ymin:ymax, xmin:xmax] cropped_depth = depth_array[ymin:ymax, xmin:xmax] if cropped_image.size > 0: masked_image = np.zeros_like(cropped_image) masked_image[np.where(cropped_image == 12)] = 1 if np.sum(masked_image) >= 0.20 * masked_image.size: masked_depth = cropped_depth[np.where(masked_image == 1)] mean_depth = np.mean(masked_depth) * 1000 if abs(mean_depth - bounding_box[0][-1]) <= 2 and mean_depth < 150: detected.append( DetectedObject((xmin, xmax, ymin, ymax), 1.0, color.get_label())) return detected
def __get_output_bboxes(self, results): """ Transform from CenterNet output format to our format.""" output = [] for category in range(1, self._opt.num_classes + 1): for bbox in results[category]: confidence = bbox[4] if confidence >= self._flags.detector_min_score_threshold: corners = (int(bbox[0]), int(bbox[2]), int(bbox[1]), int(bbox[3])) output.append( DetectedObject(corners, confidence, self._coco_labels[category])) return output
def __convert_to_detected_objs(self, boxes, scores, labels, height, width): index = 0 detected_objects = [] while index < len(boxes) and index < len(scores): if scores[index] >= self._flags.detector_min_score_threshold: ymin = int(boxes[index][0] * height) xmin = int(boxes[index][1] * width) ymax = int(boxes[index][2] * height) xmax = int(boxes[index][3] * width) corners = (xmin, xmax, ymin, ymax) detected_objects.append( DetectedObject(corners, scores[index], labels[index])) index += 1 return detected_objects
def track(self, frame, confidence_scores=None, bboxes=None): if bboxes: detections = self.convert_detections_for_deepsort_alg(bboxes) self.tracker, detections_class = self._deepsort.run_deep_sort( frame, confidence_scores, detections) if self.tracker: tracked_objects = [] for track in self.tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr( ) # converts x, y, w, h bbox to tlbr bbox (top left and bottom right coords) corners = (bbox[0], bbox[2], bbox[1], bbox[3] ) # converts to xmin, xmax, ymin, ymax format tracked_objects.append( DetectedObject(corners, "", 0, track.track_id)) return True, tracked_objects return False, []
def __get_vehicles(self, vehicles, vehicle_transform, depth_array): """ Transforms vehicles into detected objects. Args: vehicles: List of Vehicle objects. vehicle_transform: Ego-vehicle transform. depth_array: Depth frame taken at the time when pedestrians were collected. """ det_objs = [] for vehicle in vehicles: bbox = get_2d_bbox_from_3d_box(depth_array, vehicle_transform, vehicle.transform, vehicle.bounding_box, self._bgr_transform, self._bgr_intrinsic, self._bgr_img_size, 3.0, 3.0) if bbox is not None: det_objs.append(DetectedObject(bbox, 1.0, 'vehicle')) return det_objs
def __get_pedestrians(self, pedestrians, vehicle_transform, depth_array, segmented_image): """ Transforms pedestrians into detected objects. Args: pedestrians: List of Pedestrian objects. vehicle_transform: Ego-vehicle transform. depth_array: Depth frame taken at the time when pedestrians were collected. segmented_image: The segmentation frame taken at the time when the pedestrians were collected. """ det_objs = [] for pedestrian in pedestrians: bbox = get_2d_bbox_from_3d_box( vehicle_transform, pedestrian.transform, pedestrian.bounding_box, self._bgr_transform, self._bgr_intrinsic, self._bgr_img_size, depth_array, segmented_image, 4) if bbox is not None: det_objs.append(DetectedObject(bbox, 1.0, 'pedestrian', pedestrian.id)) return det_objs
def get_traffic_light_det_objs(traffic_lights, camera_transform, depth_array, frame_width, frame_height, town_name, fov=90): """ Get the traffic lights that are within the camera frame. Note: This method should be used with Carla 0.9.* """ # Get the location of the bounding boxes for these lights. bbox_state = get_traffic_lights_bbox_state(camera_transform, traffic_lights, town_name) # Convert the bounding boxes to a camera view. extrinsic_matrix = camera_transform.matrix intrinsic_matrix = create_intrinsic_matrix(frame_width, frame_height, fov) det_objs = [] for box, color in bbox_state: bounding_box = [] for location in box: bounding_box.append( location_3d_to_view(location, extrinsic_matrix, intrinsic_matrix)) # Check if they are in front and visible. z_values = [loc.z > 0 for loc in bounding_box] if not any(z_values): continue # They are in the front, now find if they are visible in the view. x_min = x_max = int(bounding_box[0].x) y_min = y_max = int(bounding_box[0].y) for i in range(1, 4): x_min = min(x_min, int(bounding_box[i].x)) x_max = max(x_max, int(bounding_box[i].x)) y_min = min(y_min, int(bounding_box[i].y)) y_max = max(y_max, int(bounding_box[i].y)) x_bounds = (x_min >= 0 and x_min < frame_width and x_max >= 0 and x_max < frame_width) y_bounds = (y_min >= 0 and y_min < frame_height and y_max >= 0 and y_max < frame_height) if (x_bounds and y_bounds and x_max - x_min >= 3 and y_max - y_min > 6): middle_x = (x_min + x_max) / 2 middle_y = (y_min + y_max) / 2 depth = depth_array[middle_y][middle_x] * 1000 # Ignore tl if it is occluded or far away. if abs(depth - bounding_box[0].z) > 2 or depth > 150: continue label = '' if color == TrafficLightColor.GREEN: label = 'green traffic light' elif color == TrafficLightColor.YELLOW: label = 'yellow traffic light' elif color == TrafficLightColor.RED: label = 'red traffic light' else: label = 'off traffic light' det_objs.append( DetectedObject((x_min, x_max, y_min, y_max), 1.0, label)) return det_objs