def parse_vehicles(self, vehicles, ego_vehicle_id): # vehicles is a dictionary that maps each vehicle's id to # a dictionary of information about that vehicle. Each such dictionary # contains four items: the vehicle's id, position, orientation, and # bounding_box (represented as four points in GPS coordinates). vehicles_list = [] for veh_dict in vehicles.values(): vehicle_id = veh_dict['id'] location = pylot.utils.Location.from_gps(*veh_dict['position']) roll, pitch, yaw = veh_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) if vehicle_id == ego_vehicle_id: # Can compare against canbus output to check that # transformations are working. self._logger.debug( 'Ego vehicle location with ground_obstacles: {}'.format( location)) else: vehicles_list.append( Obstacle( None, # We currently don't use bounding box 1.0, # confidence 'vehicle', vehicle_id, pylot.utils.Transform(location, rotation))) return vehicles_list
def parse_static_obstacles(self, static_obstacles): # Each static obstacle has an id and position. static_obstacles_list = [] for static_obstacle_dict in static_obstacles.values(): static_obstacle_id = static_obstacle_dict['id'] location = pylot.utils.Location.from_gps( *static_obstacle_dict['position']) static_obstacles_list.append( Obstacle( None, # bounding box 1.0, # confidence 'static_obstacle', static_obstacle_id, pylot.utils.Transform(location, pylot.utils.Rotation()))) return static_obstacles_list
def track(self, frame): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ self._tracker = SiamRPN_track(self._tracker, frame.frame) target_pos = self._tracker['target_pos'] target_sz = self._tracker['target_sz'] self.obstacle.bounding_box = BoundingBox2D( int(target_pos[0] - target_sz[0] / 2.0), int(target_pos[0] + target_sz[0] / 2.0), int(target_pos[1] - target_sz[1] / 2.0), int(target_pos[1] + target_sz[1] / 2.0)) return Obstacle(self.obstacle.bounding_box, self.obstacle.confidence, self.obstacle.label, self.obstacle.id)
def parse_people(self, people): # Similar to vehicles, each entry of people is a dictionary that # contains four items, the person's id, position, orientation, # and bounding box. people_list = [] for person_dict in people.values(): person_id = person_dict['id'] location = pylot.utils.Location.from_gps(*person_dict['position']) roll, pitch, yaw = person_dict['orientation'] rotation = pylot.utils.Rotation(pitch, yaw, roll) people_list.append( Obstacle( None, # bounding box 1.0, # confidence 'person', person_id, pylot.utils.Transform(location, rotation))) return people_list
def track(self, frame, obstacles=[]): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # If obstacles, run deep sort to update tracker with detections. # Otherwise, step each confirmed track one step forward. if obstacles: bboxes, labels, confidence_scores, ids = [], [], [], [] for obstacle in obstacles: bboxes.append(obstacle.bounding_box.as_width_height_bbox()) labels.append(obstacle.label) confidence_scores.append(obstacle.confidence) ids.append(obstacle.id) self._deepsort.run_deep_sort(frame.frame, confidence_scores, bboxes, labels, ids) else: for track in self._deepsort.tracker.tracks: if track.is_confirmed(): track.predict(self._deepsort.tracker.kf) tracked_obstacles = [] for track in self._deepsort.tracker.tracks: if track.is_confirmed(): # Converts x, y, w, h bbox to tlbr bbox (top left and bottom # right coords). bbox = track.to_tlbr() # Converts to xmin, xmax, ymin, ymax format. xmin = int(bbox[0]) xmax = int(bbox[2]) ymin = int(bbox[1]) ymax = int(bbox[3]) if xmin < xmax and ymin < ymax: bbox = BoundingBox2D(xmin, xmax, ymin, ymax) tracked_obstacles.append( Obstacle(bbox, 0, track.label, track.track_id)) else: self._logger.error( "Tracker found invalid bounding box {} {} {} {}". format(xmin, xmax, ymin, ymax)) return True, tracked_obstacles
def on_frame_msg(self, msg, obstacle_tracking_stream): """Invoked when a FrameMessage is received on the camera stream.""" self._logger.debug('@{}: {} received frame'.format( msg.timestamp, self.config.name)) assert msg.frame.encoding == 'BGR', 'Expects BGR frames' image_np = msg.frame.as_bgr_numpy_array() results = self.run_model(image_np) obstacles = [] for res in results: track_id = res['tracking_id'] bbox = res['bbox'] score = res['score'] (label_id, ) = res['class'] - 1, if label_id > 80: continue label = self.trained_dataset.class_name[label_id] if label in ['Pedestrian', 'pedestrian']: label = 'person' elif label == 'Car': label = 'car' elif label == 'Cyclist': label == 'bicycle' if label in OBSTACLE_LABELS: bounding_box_2D = BoundingBox2D(bbox[0], bbox[2], bbox[1], bbox[3]) bounding_box_3D = None if 'dim' in res and 'loc' in res and 'rot_y' in res: bounding_box_3D = BoundingBox3D.from_dimensions( res['dim'], res['loc'], res['rot_y']) obstacles.append( Obstacle(bounding_box_3D, score, label, track_id, bounding_box_2D=bounding_box_2D)) obstacle_tracking_stream.send( ObstaclesMessage(msg.timestamp, obstacles, 0))
def track(self, frame): """ Tracks obstacles in a frame. Args: frame (:py:class:`~pylot.perception.camera_frame.CameraFrame`): Frame to track in. """ # each track in tracks has format ([xmin, ymin, xmax, ymax], id) obstacles = [] for track in self.tracker.trackers: coords = track.predict()[0].tolist() # changing to xmin, xmax, ymin, ymax format xmin = int(coords[0]) xmax = int(coords[2]) ymin = int(coords[1]) ymax = int(coords[3]) if xmin < xmax and ymin < ymax: bbox = BoundingBox2D(xmin, xmax, ymin, ymax) obstacles.append(Obstacle(bbox, 0, track.label, track.id)) else: self._logger.error( "Tracker found invalid bounding box {} {} {} {}".format( xmin, xmax, ymin, ymax)) return True, obstacles
def on_msg_camera_stream(self, msg, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ self._logger.debug('@{}: {} received message'.format( msg.timestamp, self.config.name)) start_time = time.time() # The models expect BGR images. assert msg.frame.encoding == 'BGR', 'Expects BGR frames' num_detections, res_boxes, res_scores, res_classes = self.__run_model( msg.frame.frame) obstacles = [] for i in range(0, num_detections): if res_classes[i] in self._coco_labels: if (res_scores[i] >= self._flags.obstacle_detection_min_score_threshold): if (self._coco_labels[res_classes[i]] in OBSTACLE_LABELS): obstacles.append( Obstacle(BoundingBox2D( int(res_boxes[i][1] * msg.frame.camera_setup.width), int(res_boxes[i][3] * msg.frame.camera_setup.width), int(res_boxes[i][0] * msg.frame.camera_setup.height), int(res_boxes[i][2] * msg.frame.camera_setup.height)), res_scores[i], self._coco_labels[res_classes[i]], id=self._unique_id)) self._unique_id += 1 else: self._logger.warning( 'Ignoring non essential detection {}'.format( self._coco_labels[res_classes[i]])) else: self._logger.warning('Filtering unknown class: {}'.format( res_classes[i])) self._logger.debug('@{}: {} obstacles: {}'.format( msg.timestamp, self.config.name, obstacles)) # Get runtime in ms. runtime = (time.time() - start_time) * 1000 # Send out obstacles. obstacles_stream.send( ObstaclesMessage(msg.timestamp, obstacles, runtime)) obstacles_stream.send(erdos.WatermarkMessage(msg.timestamp)) if self._flags.log_detector_output: msg.frame.annotate_with_bounding_boxes(msg.timestamp, obstacles, None, self._bbox_colors) msg.frame.save(msg.timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name))
def on_watermark(self, timestamp, obstacles_stream): """Invoked whenever a frame message is received on the stream. Args: msg (:py:class:`~pylot.perception.messages.FrameMessage`): Message received. obstacles_stream (:py:class:`erdos.WriteStream`): Stream on which the operator sends :py:class:`~pylot.perception.messages.ObstaclesMessage` messages. """ if timestamp.is_top: return start_time = time.time() ttd_msg = self._ttd_msgs.popleft() frame_msg = self._frame_msgs.popleft() ttd = ttd_msg.data self.update_model_choice(ttd) frame = frame_msg.frame inputs = frame.as_rgb_numpy_array() detector_start_time = time.time() outputs_np = self._tf_session.run( self._signitures['prediction'], feed_dict={self._signitures['image_arrays']: [inputs]})[0] detector_end_time = time.time() self._logger.debug("@{}: detector runtime {}".format( timestamp, (detector_end_time - detector_start_time) * 1000)) obstacles = [] camera_setup = frame.camera_setup for _, ymin, xmin, ymax, xmax, score, _class in outputs_np: xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) if _class in self._coco_labels: if (score >= self._flags.obstacle_detection_min_score_threshold and self._coco_labels[_class] in OBSTACLE_LABELS): xmin, xmax = max(0, xmin), min(xmax, camera_setup.width) ymin, ymax = max(0, ymin), min(ymax, camera_setup.height) if xmin < xmax and ymin < ymax: obstacles.append( Obstacle(BoundingBox2D(xmin, xmax, ymin, ymax), score, self._coco_labels[_class], id=self._unique_id)) self._unique_id += 1 self._csv_logger.info( "{},{},detection,{},{:4f}".format( pylot.utils.time_epoch_ms(), timestamp.coordinates[0], self._coco_labels[_class], score)) else: self._logger.debug( 'Filtering unknown class: {}'.format(_class)) if self._flags.log_detector_output: frame.annotate_with_bounding_boxes(timestamp, obstacles, None, self._bbox_colors) frame.save(timestamp.coordinates[0], self._flags.data_path, 'detector-{}'.format(self.config.name)) # end_time = time.time() obstacles_stream.send(ObstaclesMessage(timestamp, obstacles, 0)) operator_time_total_end = time.time() self._logger.debug("@{}: total time spent: {}".format( timestamp, (operator_time_total_end - start_time) * 1000))