def train_model(self, csv_file, root_dir, num_epochs, batch_size, silent=False): """Trains the model. This method also visualizes the training of the network. It is able to show the progress in terms of step, batch, and epoch number, loss, and the plot from the batch. It also outputs a plot of the loss over time. Args: csv_file (string): File address of the csv_file for training. root_dir (string): Root directory of the data for training. num_epochs (int): Number of epochs to train for batch_size (int): Number of objects in each batch silent (bool): Whether to show the plot or not. True hides the plot. """# }}} # Start by making the tkinter parts{{{{{{{{{ root = tk.Tk() root.title("DriveNet Training") root.geometry("350x130") # Create timer and counter to calculate processing rate timer = Timer() counter = 0 # Plot save location plot_loc = path.join( path.split(self.save_dir)[0], strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv') # Configure the grid and geometry # ----------------------- # | step | rate | # | epoch | loss | # | status message | # ----------------------- root.columnconfigure(0, minsize=175) root.columnconfigure(1, minsize=175) # Prepare tk variables with default values step_var = tk.StringVar(master=root, value="Step: 0/0") rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s") epoch_var = tk.StringVar(master=root, value="Epoch: 1/{}".format(num_epochs)) loss_var = tk.StringVar(master=root, value="Loss: 0") time_var = tk.StringVar(master=root, value="Time left: 0:00") status = tk.StringVar(master=root, value="Preparing dataset") # Prepare tk labels to be put on the grid tk.Label(root, textvariable=step_var).grid(row=0, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=rate_var).grid(row=0, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=epoch_var).grid(row=1, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=loss_var).grid(row=1, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=time_var).grid(row=2, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=status).grid(row=3, column=0, columnspan=2, sticky="SW", padx=5, pady=5) # }}} # Update root so it actually shows something root.update_idletasks() root.update() # Open file for loss data plot loss_file = open(plot_loc, 'a') # }}}}}} # Prepare the datasets and their corresponding dataloaders complete_data = DrivingSimDataset(csv_file, root_dir) # {{{ train_loader = DataLoader(dataset=complete_data, batch_size=batch_size, shuffle=True) status.set("Data sets loaded") # {{{ root.update_idletasks() root.update() # total_step = 0 status.set("Training") root.update_idletasks() root.update() self.load_state_dict() if not path.isfile(self.save_dir): status.set( "Weights do not exist. Running with random weights.") # }}} # print("len(train_loader) = {0}".format(total_step)) # print("Dataset:") # for i in enumerate(train_loader): # print(i[1]['image']) # print(i[1]['vehicle_commands']) # acc_list = [] # }}} for epoch in range(num_epochs): status.set("Training: all") # {{{ root.update_idletasks() root.update() # }}} # Former different loaders are now one single loader for all data total_step = len(train_loader) for data in enumerate(train_loader): # run the forward pass # data[0] is the iteration, data[1] is the data # print(images) images = data[1]['image'] sec = data[1]['sec'] # Put segmented and normal into one tensor images = cat((images, sec), dim=1) vehicle_info = (data[1]["vehicle_commands"], data[1]["cmd"]) # Prep target by turning it into a CUDA compatible format car_data = vehicle_info[0].to(self.device, non_blocking=True) # self.optimizer.zero_grad() images = torch.unsqueeze(images, 0) self.run_model(images.to(self.device, non_blocking=True), vehicle_info, batch_size) # # # Print the out result{{{ # print("Network output:") # print(self.out.cpu().detach().numpy())# }}} # calculate the loss if self.out is None: raise ValueError("forward() has not been run properly.") loss = self.criterion(self.out, car_data) # Zero grad self.optimizer.zero_grad() # Backprop and preform Adam optimization loss.backward() self.optimizer.step() # # Track the accuracy{{{ # total = data.size(0) # _, predicted = torch.max(self.output.data, 1) # correct = (predicted == data[0]).sum().item() # acc_list.append(correct / total) # Update data step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step)) epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs)) loss_var.set("Loss: {:.3f}".format(loss.item())) # }}} counter += 1 if timer.elapsed_seconds_since_lap() > 0.3: # {{{ sps = float(counter) / timer.elapsed_seconds_since_lap() rate_var.set("Rate: {:.2f} steps/s".format(sps)) timer.lap() counter = 0 if sps == 0: time_left = "NaN" else: time_left = int(((total_step * num_epochs) - ((float(data[0]) + 1.0) + (total_step * epoch))) / sps) time_left = datetime.timedelta(seconds=time_left) time_left = str(time_left) time_var.set("Time left: {}".format(time_left)) root.update() root.update_idletasks() # }}} loss_file.write("{}\n".format(loss.item())) # Now save the loss file and the weights loss_file.close() # Save the bias and weights torch.save(self.network.state_dict(), self.save_dir) torch.cuda.empty_cache() status.set("Done") # {{{ if not silent: PlotIt(plot_loc) root.mainloop() # }}}
class Controller: def __init__(self, network): """Acts as a controller object that sends and receives data from AirSim. This class acts as the interface between the network and the AirSim simulation inside of Unreal Engine. It is able to receive camera images from AirSim as well as send the driving commands to it and is responsible for running the network. Control Scheme: NUM_4 : Left NUM_8 : Forwards NUM_6 : Right Q : Quit SPACE : Reset Args: network (runner.Runner): A PyTorch network wrapped by a runner. """ # Initialize AirSim connection self.client = airsim.CarClient() self.client.confirmConnection() self.client.enableApiControl(True) # Set up the network self.network = network # Set up timers for fps counting self._timer = Timer() self.counter = 0 # Set up display variables self._display = None self._main_image = None self.fps_text = "FPS: 0" self.direction_text = "Direction: Forwards" self._text_font = None # Directions: # -1 : Left # 0 : Forwards # 1 : Right self._direction = 0 # Direction defaults to forwards self.out = None # Network output self.throttle = 0 # Throttle output self.max_throttle = 0.35 # Throttle limit # Quitting self._request_quit = False def execute(self): """"Launch PyGame.""" pygame.init() self.__init_game() # Initialize fonts if not pygame.font.get_init(): pygame.font.init() self._text_font = pygame.font.SysFont("helvetica", 24) while not self._request_quit: self.__on_loop() self.__on_render() if self._request_quit: pygame.display.quit() pygame.quit() self.client.enableApiControl(False) # Give control back to user return def __init_game(self): """Initializes the PyGame window and creates a new episode. This is separate from the main init method because the init is intended to create an instance of the class, but not to start the game objects yet. """ self.__on_reset() self._display = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) self.network.load_state_dict() print("PyGame started") def __on_reset(self): """Resets the state of the client.""" print("Resetting client") self.client.reset() self._timer = Timer() def __on_loop(self): """Commands to execute on every loop.""" # Make time tick self._timer.tick() # Get an image from Unreal response = self.client.simGetImages([ airsim.ImageRequest("0", airsim.ImageType.Scene, False, False)]) rgb = None if response: rgb = self.__response_to_cv(response[0], 3) self._main_image = rgb # Get key presses and parse them events = pygame.event.get() for event in events: if event.type == pygame.KEYDOWN: self.__parse_event(event) if event.type == pygame.QUIT: self._request_quit = True # run the network # First convert the images to tensors rgb = self.__to_tensor(rgb).float().to(self.network.device) self.out = self.network.run_model(torch.unsqueeze(torch.unsqueeze(rgb, 0), 0), [0, [self._direction]], 1) # get its data, then to numpy, then to a tuple self.out = tuple(self.out.cpu().detach().numpy()) # Now send the command to airsim if MAX_THROTTLE_ONLY: self.throttle = self.max_throttle else: self.throttle = self.out[0][1] self.__send_command((self.out[0][0], self.throttle)) # Computation is now complete. Add to the counter. self.counter += 1 # Determine then update direction if self._direction == 0: direction = "Forward" elif self._direction == -1: direction = "Left" else: direction = "Right" self.direction_text = "Direction: {0}".format(direction) # Update FPS if self._timer.elapsed_seconds_since_lap() > 0.3: # Determine FPS fps = float(self.counter) / self._timer.elapsed_seconds_since_lap() # Update the info self.fps_text = "FPS: {0}".format(int(fps)) # Reset counters self.counter = 0 self._timer.lap() pygame.display.update() # Finally, update the display. def __on_render(self): """Renders the pygame window itself.""" if self._main_image is not None: # If there is an image in the pipeline, render it. surface_main = pygame.surfarray.make_surface( self._main_image.swapaxes(0, 1)) self._display.blit(surface_main, (0, 0)) # Render white fill to "reset" the text self._display.fill((255, 255, 255), rect=pygame.Rect(0, 64, WINDOW_WIDTH, WINDOW_HEIGHT - 64)) # Create the text in the window surface_fps = self._text_font.render(self.fps_text, True, (0, 0, 0)) surface_direction = self._text_font.render(self.direction_text, True, (0, 0, 0)) if self.out is None: self.out = [0, 0] surface_steering = self._text_font.render("Steering: %.2f" % self.out[0][0], True, (0, 0, 0)) surface_throttle = self._text_font.render("Throttle: %.2f" % self.throttle, True, (0, 0, 0)) # And now render that text self._display.blit(surface_fps, (6, 70)) self._display.blit(surface_direction, (120, 70)) self._display.blit(surface_steering, (6, 100)) self._display.blit(surface_throttle, (6, 130)) @staticmethod def __response_to_cv(r, channels): if r.compress: image = cv2.imdecode(np.fromstring(r.image_data_uint8, dtype=np.uint8), 1) image = image.reshape(r.height, r.width, channels) image = cv2.cvtColor(image[:, :, 0:channels], cv2.COLOR_RGB2BGR) else: image = np.frombuffer(r.image_data_uint8, dtype=np.uint8) image = image.reshape(r.height, r.width, channels + 1) image = image[:, :, 0:channels] return image def __parse_event(self, event): """Parses PyGame events. Args: event (pygame.Event): The PyGame event to be parsed. """ if event.key == K_KP8: self._direction = 0 elif event.key == K_KP4: self._direction = -1 elif event.key == K_KP6: self._direction = 1 elif event.key == K_SPACE: self.__on_reset() elif event.key == K_q: self._request_quit = True def __send_command(self, command): """Sends driving commands over the AirSim API. Args: command (tuple): A tuple in the form (steering, throttle). """ car_control = airsim.CarControls() car_control.steering = float(command[0]) car_control.throttle = command[1] self.client.setCarControls(car_control) @staticmethod def __to_tensor(image): """Turns an image into a tensor Args: image: The image to be converted as an array of uncompressed bits. Returns: (torch.Tensor) the image as a tensor. """ image = image.transpose(2, 0, 1) return torch.from_numpy(image)
class CarlaGame(object): def __init__(self, carla_client, args): self.client = carla_client self._carla_settings, self._intrinsic, self._camera_to_car_transform, self._lidar_to_car_transform = make_carla_settings( args) self._timer = None self._display = None self._main_image = None self._mini_view_image1 = None self._mini_view_image2 = None self._enable_autopilot = args.autopilot self._lidar_measurement = None self._map_view = None self._is_on_reverse = False self._city_name = args.map_name self._map = CarlaMap(self._city_name, 16.43, 50.0) if self._city_name is not None else None self._map_shape = self._map.map_image.shape if self._city_name is not None else None self._map_view = self._map.get_map( WINDOW_HEIGHT) if self._city_name is not None else None self._position = None self._agent_positions = None self.captured_frame_no = 9 self._measurements = None self._extrinsic = None # To keep track of how far the car has driven since the last capture of data self._agent_location_on_last_capture = None self._frames_since_last_capture = 0 # How many frames we have captured since reset self._captured_frames_since_restart = 0 self._build_network() def _build_network(self): cfg_from_file(CONFIG_FILE) # set bs, gpu_num and workers_num to be 1 cfg.TRAIN.CONFIG.BATCH_SIZE = 1 # only support bs=1 when testing cfg.TRAIN.CONFIG.GPU_NUM = 1 cfg.DATA_LOADER.NUM_THREADS = 1 cfg.TEST.WITH_GT = False self.evaluator = Evaluator(config=cfg, model_path=MODEL_PATH) def execute(self): """Launch the PyGame.""" pygame.init() self._initialize_game() try: while True: for event in pygame.event.get(): if event.type == pygame.QUIT: return reset = self._on_loop() if not reset: self._on_render() finally: pygame.quit() def _initialize_game(self): if self._city_name is not None: self._display = pygame.display.set_mode( (WINDOW_WIDTH + int( (WINDOW_HEIGHT / float(self._map.map_image.shape[0])) * self._map.map_image.shape[1]), WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) else: self._display = pygame.display.set_mode( (WINDOW_WIDTH, WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) logging.debug('pygame started') self._on_new_episode() def _on_new_episode(self): self._carla_settings.randomize_seeds() self._carla_settings.randomize_weather() scene = self.client.load_settings(self._carla_settings) number_of_player_starts = len(scene.player_start_spots) player_start = np.random.randint(number_of_player_starts) logging.info('Starting new episode...') self.client.start_episode(player_start) self._timer = Timer() self._is_on_reverse = False # Reset all tracking variables self._agent_location_on_last_capture = None self._frames_since_last_capture = 0 self._captured_frames_since_restart = 0 def _on_loop(self): self._timer.tick() measurements, sensor_data = self.client.read_data() is_stuck = self._frames_since_last_capture >= NUM_EMPTY_FRAMES_BEFORE_RESET is_stuck = False is_enough_datapoints = (self._captured_frames_since_restart + 1) % NUM_RECORDINGS_BEFORE_RESET == 0 if (is_stuck or is_enough_datapoints) and GEN_DATA: logging.warning("Is stucK: {}, is_enough_datapoints: {}".format( is_stuck, is_enough_datapoints)) self._on_new_episode() # If we dont sleep, the client will continue to render return True # (Extrinsic) Rt Matrix # (Camera) local 3d to world 3d. # Get the transform from the player protobuf transformation. world_transform = Transform(measurements.player_measurements.transform) # Compute the final transformation matrix. self._extrinsic = world_transform * self._camera_to_car_transform self._measurements = measurements self._last_player_location = measurements.player_measurements.transform.location self._main_image = sensor_data.get('CameraRGB', None) self._lidar_measurement = sensor_data.get('Lidar32', None) self._depth_image = sensor_data.get('DepthCamera', None) # Print measurements every second. if self._timer.elapsed_seconds_since_lap() > 1.0: if self._city_name is not None: # Function to get car position on map. map_position = self._map.convert_to_pixel([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) # Function to get orientation of the road car is in. lane_orientation = self._map.get_lane_orientation([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) MeasurementsDisplayHelper.print_player_measurements_map( measurements.player_measurements, map_position, lane_orientation, self._timer) else: MeasurementsDisplayHelper.print_player_measurements( measurements.player_measurements, self._timer) # Plot position on the map as well. self._timer.lap() control = self._get_keyboard_control(pygame.key.get_pressed()) # Set the player position if self._city_name is not None: self._position = self._map.convert_to_pixel([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) self._agent_positions = measurements.non_player_agents if control is None: self._on_new_episode() elif self._enable_autopilot: self.client.send_control( measurements.player_measurements.autopilot_control) else: self.client.send_control(control) def _get_keyboard_control(self, keys): """ Return a VehicleControl message based on the pressed keys. Return None if a new episode was requested. """ control = KeyboardHelper.get_keyboard_control(keys, self._is_on_reverse, self._enable_autopilot) if control is not None: control, self._is_on_reverse, self._enable_autopilot = control return control def _on_render(self): datapoints = [] if self._main_image is not None and self._depth_image is not None: # Convert main image image = image_converter.to_rgb_array(self._main_image) # Retrieve and draw datapoints image, datapoints = self._generate_datapoints(image) # Draw lidar # Camera coordinate system is left, up, forwards if VISUALIZE_LIDAR: # Calculation to shift bboxes relative to pitch and roll of player rotation = self._measurements.player_measurements.transform.rotation pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw # Since measurements are in degrees, convert to radians pitch = degrees_to_radians(pitch) roll = degrees_to_radians(roll) yaw = degrees_to_radians(yaw) print('pitch: ', pitch) print('roll: ', roll) print('yaw: ', yaw) # Rotation matrix for pitch rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0], [-sin(pitch), 0, cos(pitch)]]) # Rotation matrix for roll rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)], [0, sin(roll), cos(roll)]]) # combined rotation matrix, must be in order roll, pitch, yaw rotRP = np.matmul(rotR, rotP) # Take the points from the point cloud and transform to car space point_cloud = np.array( self._lidar_to_car_transform.transform_points( self._lidar_measurement.data)) point_cloud[:, 2] -= LIDAR_HEIGHT_POS point_cloud = np.matmul(rotRP, point_cloud.T).T # Transform to camera space by the inverse of camera_to_car transform point_cloud_cam = self._camera_to_car_transform.inverse( ).transform_points(point_cloud) point_cloud_cam[:, 1] += LIDAR_HEIGHT_POS image = lidar_utils.project_point_cloud( image, point_cloud_cam, self._intrinsic, 1) # Display image surface = pygame.surfarray.make_surface(image.swapaxes(0, 1)) self._display.blit(surface, (0, 0)) pygame.display.flip() if (self._timer.step + 1) % STEPS_BETWEEN_RECORDINGS == 0: if datapoints: # Avoid doing this twice or unnecessarily often if not VISUALIZE_LIDAR: # Calculation to shift bboxes relative to pitch and roll of player rotation = self._measurements.player_measurements.transform.rotation pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw # Since measurements are in degrees, convert to radians pitch = degrees_to_radians(pitch) roll = degrees_to_radians(roll) yaw = degrees_to_radians(yaw) print('pitch: ', pitch) print('roll: ', roll) print('yaw: ', yaw) # Rotation matrix for pitch rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0], [-sin(pitch), 0, cos(pitch)]]) # Rotation matrix for roll rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)], [0, sin(roll), cos(roll)]]) # combined rotation matrix, must be in order roll, pitch, yaw rotRP = np.matmul(rotR, rotP) # Take the points from the point cloud and transform to car space point_cloud = np.array( self._lidar_to_car_transform.transform_points( self._lidar_measurement.data)) point_cloud[:, 2] -= LIDAR_HEIGHT_POS point_cloud = np.matmul(rotRP, point_cloud.T).T self._update_agent_location() # Save screen, lidar and kitti training labels together with calibration and groundplane files # self._save_training_files(datapoints, point_cloud) img = get_image_data( image_converter.to_rgb_array(self._main_image)) lid = get_lidar_data(point_cloud) calib = get_calibration_matrices(self._intrinsic, self._extrinsic) mess = { "idx": self.captured_frame_no, "image_2": img, "velodyne": lid, "calib": calib } annos = self.evaluator.predict(mess) # annos = self.evaluator.evaluate(self.captured_frame_no) print(annos) scores = annos["score"] vertices = annos["vertices"] bbox = annos["bbox"] for i, score in enumerate(scores): if score * 100 > 35: # draw 3D image = draw_projected_box3d(image, vertices[i], color=(0, 0, 255), thickness=1) image = cv2.putText( image, str(round(score * 100, 2)), (int(bbox[i][0]), int(bbox[i][1])), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 255), 1, cv2.LINE_AA) # draw 2D # bbox = annos["bbox"] # x1, y1, x2, y2 = bbox[i] # cv2.rectangle(image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=2) # Display image surface = pygame.surfarray.make_surface( image.swapaxes(0, 1)) self._display.blit(surface, (0, 0)) pygame.display.flip() self.captured_frame_no += 1 self._captured_frames_since_restart += 1 self._frames_since_last_capture = 0 time.sleep(0.5) else: logging.debug("Could save datapoint".format( DISTANCE_SINCE_LAST_RECORDING)) else: self._frames_since_last_capture += 1 logging.debug( "Could not save training data - no visible agents of selected classes in scene" ) def _update_agent_location(self): self._agent_location_on_last_capture = self._measurements.player_measurements.transform.location def _generate_datapoints(self, image): """ Returns a list of datapoints (labels and such) that are generated this frame together with the main image image """ datapoints = [] image = image.copy() # Remove this rotRP = np.identity(3) # Stores all datapoints for the current frames for agent in self._measurements.non_player_agents: if should_detect_class(agent) and GEN_DATA: image, kitti_datapoint = create_kitti_data_point( agent, self._intrinsic, self._extrinsic.matrix, image, self._depth_image, self._measurements.player_measurements, rotRP) if kitti_datapoint: datapoints.append(kitti_datapoint) return image, datapoints def _save_training_files(self, datapoints, point_cloud): logging.info( "Attempting to save at timer step {}, frame no: {}".format( self._timer.step, self.captured_frame_no)) groundplane_fname = GROUNDPLANE_PATH.format(self.captured_frame_no) lidar_fname = LIDAR_PATH.format(self.captured_frame_no) kitti_fname = LABEL_PATH.format(self.captured_frame_no) img_fname = IMAGE_PATH.format(self.captured_frame_no) calib_filename = CALIBRATION_PATH.format(self.captured_frame_no) save_groundplanes(groundplane_fname, self._measurements.player_measurements, LIDAR_HEIGHT_POS) save_ref_files(OUTPUT_FOLDER, self.captured_frame_no) save_image_data(img_fname, image_converter.to_rgb_array(self._main_image)) save_kitti_data(kitti_fname, datapoints) save_lidar_data(lidar_fname, point_cloud, LIDAR_HEIGHT_POS, LIDAR_DATA_FORMAT) save_calibration_matrices(calib_filename, self._intrinsic, self._extrinsic)
def train_segmentation(self, image_dir, batch_size, num_epochs): """Trains the model. Args: image_dir (string): Root directory of the data for training. num_epochs (int): Number of epochs to train for. batch_size (int): Number of objects in each batch. """ self.load_network("segmentation") # Start by making Tk parts root = tk.Tk() root.title("DriveNet Training") root.geometry("350x258") # Timers # Create timer and counter to calculate processing rate timer = Timer() counter = 0 # Plot save location plot_loc = path.dirname(path.dirname(path.abspath(__file__))) plot_loc = path.join(plot_loc, "plot_csv") plot_loc = path.join( plot_loc, strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv') # Configure the grid and geometry # ----------------------- # | Target segmentation | # | Output | # | step | rate | # | epoch | loss | # | status message | # ----------------------- root.columnconfigure(0, minsize=175) root.columnconfigure(1, minsize=175) root.rowconfigure(0, minsize=64) root.rowconfigure(1, minsize=64) # Prepare tk variables with default values step_var = tk.StringVar(master=root, value="Step: 0/0") rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s") epoch_var = tk.StringVar(master=root, value="Epoch: 1/{}".format(num_epochs)) loss_var = tk.StringVar(master=root, value="Loss: 0") time_var = tk.StringVar(master=root, value="Time left: 0:00") status = tk.StringVar(master=root, value="Preparing dataset") # Prepare image canvases black_array = io.imread(path.join(image_dir, "seg_00001-cam_0.png")) black_array.fill(0) black_image = ImageTk.PhotoImage(image=Image.fromarray(black_array)) target_canvas = tk.Canvas(root, width=320, height=64) target_canvas_img = target_canvas.create_image(0, 0, anchor="nw", image=black_image) target_canvas.grid(row=0, column=0, columnspan=2) seg_canvas = tk.Canvas(root, width=320, height=64) seg_canvas_img = seg_canvas.create_image(0, 0, anchor="nw", image=black_image) seg_canvas.grid(row=1, column=0, columnspan=2) # Prepare tk labels to be put on the grid tk.Label(root, textvariable=step_var).grid(row=2, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=rate_var).grid(row=2, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=epoch_var).grid(row=3, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=loss_var).grid(row=3, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=time_var).grid(row=4, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=status).grid(row=5, column=0, columnspan=2, sticky="SW", padx=5, pady=5) # Update root so it actually shows something root.update_idletasks() root.update() # Open file for loss data plot loss_file = open(plot_loc, 'a') # Prepare the datasets and their corresponding dataloaders data = SegmentedDataset(image_dir) train_loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=True) status.set("Data sets loaded") root.update_idletasks() root.update() # total_step = 0 status.set("Training") root.update_idletasks() root.update() if path.isfile(self.weights_save_dir): self.network.load_state_dict(torch.load(self.weights_save_dir)) for epoch in range(num_epochs): status.set("Training: all") root.update_idletasks() root.update() # Former different loaders are now one single loader for all data total_step = len(train_loader) for data in enumerate(train_loader): # run the forward pass # data[0] is the iteration, data[1] is the data # print(images) image = data[1]['image'] seg = data[1]["seg"] # Prep target by turning it into a CUDA compatible format seg_cuda = data[1]["seg"].to(device=self.device, dtype=torch.long, non_blocking=True) seg = seg.numpy() out = self.network(image.to(self.device, non_blocking=True)) # calculate the loss if out is None: raise ValueError("forward() has not been run properly.") loss = self.criterion(out, seg_cuda) # Zero grad self.optimizer.zero_grad() # Backprop and preform Adam optimization loss.backward() self.optimizer.step() # # Track the accuracy{{{ # total = data.size(0) # _, predicted = torch.max(self.output.data, 1) # correct = (predicted == data[0]).sum().item() # acc_list.append(correct / total) # Update data step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step)) epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs)) loss_var.set("Loss: {:.3f}".format(loss.item())) counter += 1 if timer.elapsed_seconds_since_lap() > 0.3: sps = float(counter) / timer.elapsed_seconds_since_lap() rate_var.set("Rate: {:.2f} steps/s".format(sps)) timer.lap() counter = 0 if sps == 0: time_left = "NaN" else: time_left = int(((total_step * num_epochs) - ((float(data[0]) + 1.0) + (total_step * epoch))) / sps) time_left = datetime.timedelta(seconds=time_left) time_left = str(time_left) time_var.set("Time left: {}".format(time_left)) # update image only once per step if data[0] % 2 == 0: target_array = self.class_to_image_array(seg) target = ImageTk.PhotoImage( image=Image.fromarray(target_array)) seg_array = self.class_prob_to_image_array( out.cpu().detach()[0]) seg_img = ImageTk.PhotoImage( image=Image.fromarray(seg_array)) target_canvas.itemconfig(target_canvas_img, image=target) seg_canvas.itemconfig(seg_canvas_img, image=seg_img) root.update() root.update_idletasks() loss_file.write("{}\n".format(loss.item())) # Now save the loss file and the weights loss_file.close() # Save the bias and weights torch.save(self.network.state_dict(), self.weights_save_dir) torch.cuda.empty_cache() status.set("Done") PlotIt(plot_loc) root.mainloop()
def train_fc(self, image_dir, batch_size, num_epochs, fc_weights): """Trains the fully connected layers using a pretrained segmentation. """ self.load_network("full") # Start by making Tk parts root = tk.Tk() root.title("DriveNet Training") root.geometry("350x258") # Timers # Create timer and counter to calculate processing rate timer = Timer() counter = 0 # Plot save location plot_loc = path.dirname(path.dirname(path.abspath(__file__))) plot_loc = path.join(plot_loc, "plot_csv") plot_loc = path.join( plot_loc, strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv') # Configure the grid and geometry # ----------------------- # | Target segmentation | # | Output | # | step | rate | # | epoch | loss | # | status message | # ----------------------- root.columnconfigure(0, minsize=175) root.columnconfigure(1, minsize=175) # Prepare tk variables with default values step_var = tk.StringVar(master=root, value="Step: 0/0") rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s") epoch_var = tk.StringVar(master=root, value="Epoch: 1/{}".format(num_epochs)) loss_var = tk.StringVar(master=root, value="Loss: 0") time_var = tk.StringVar(master=root, value="Time left: 0:00") status = tk.StringVar(master=root, value="Preparing dataset") # Prepare tk labels to be put on the grid tk.Label(root, textvariable=step_var).grid(row=0, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=rate_var).grid(row=0, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=epoch_var).grid(row=1, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=loss_var).grid(row=1, column=1, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=time_var).grid(row=2, column=0, sticky="W", padx=5, pady=5) tk.Label(root, textvariable=status).grid(row=3, column=0, columnspan=2, sticky="SW", padx=5, pady=5) # Update root so it actually shows something root.update_idletasks() root.update() # Open file for loss data plot loss_file = open(plot_loc, 'a') # Prepare the datasets and their corresponding dataloaders data = SegmentedDataset(image_dir) train_loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=True) status.set("Data sets loaded") root.update_idletasks() root.update() # total_step = 0 status.set("Training") root.update_idletasks() root.update() if path.isfile(self.weights_save_dir): self.network.seg_net.load_state_dict( torch.load(self.weights_save_dir)) if path.isfile(fc_weights): self.network.fcd.load_state_dict(torch.load(fc_weights)) for epoch in range(num_epochs): status.set("Training: all") root.update_idletasks() root.update() # Former different loaders are now one single loader for all data total_step = len(train_loader) for data in enumerate(train_loader): # run the forward pass # data[0] is the iteration, data[1] is the data # print(images) image = data[1]['image'] cmd = data[1]['cmd'] # Prep target by turning it into a CUDA compatible format steering = data[1]["vehicle_commands"].to(self.device, non_blocking=True) out = self.network(image.to(self.device, non_blocking=True), cmd) # calculate the loss if out is None: raise ValueError("forward() has not been run properly.") loss = self.criterion(out, steering) # Zero grad self.optimizer.zero_grad() # Backprop and preform optimization loss.backward() self.optimizer.step() # # Track the accuracy{{{ # total = data.size(0) # _, predicted = torch.max(self.output.data, 1) # correct = (predicted == data[0]).sum().item() # acc_list.append(correct / total) # Update data step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step)) epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs)) loss_var.set("Loss: {:.3f}".format(loss.item())) counter += 1 if timer.elapsed_seconds_since_lap() > 0.3: sps = float(counter) / timer.elapsed_seconds_since_lap() rate_var.set("Rate: {:.2f} steps/s".format(sps)) timer.lap() counter = 0 if sps == 0: time_left = "NaN" else: time_left = int(((total_step * num_epochs) - ((float(data[0]) + 1.0) + (total_step * epoch))) / sps) time_left = datetime.timedelta(seconds=time_left) time_left = str(time_left) time_var.set("Time left: {}".format(time_left)) root.update() root.update_idletasks() loss_file.write("{}\n".format(loss.item())) # Now save the loss file and the weights loss_file.close() # Save the bias and weights torch.save(self.network.state_dict(), self.weights_save_dir) torch.cuda.empty_cache() status.set("Done") PlotIt(plot_loc) root.mainloop()
class Controller: def __init__(self, weight1, weight2): """Acts as a controller object that sends and receives data from AirSim. This class acts as the interface between the network and the AirSim simulation inside of Unreal Engine. It is able to receive camera images from AirSim as well as send the driving commands to it and is responsible for running the network. Control Scheme: NUM_4 : Left NUM_8 : Forwards NUM_6 : Right Q : Quit SPACE : Reset Args: weight1 (str): Path to the segmentation network weights weight2 (str): Path to the fc network weights """ # Initialize AirSim connection self.client = airsim.CarClient() self.client.confirmConnection() # Set up the network self.seg_net = SegmentationNetwork("googlenet") self.seg_net.eval() self.seg_net.cuda() if (weight2 is None) or (weight2 != ""): self.seg_only = True else: self.fcd = FCD() self.fcd.eval() self.fcd.cuda() self.seg_only = False self.client.enableApiControl(True) self.seg_out = None self.weight1 = weight1 self.weight2 = weight2 self.device = torch.device("cuda") # Set up timers for fps counting self._timer = Timer() self.counter = 0 # Set up display variables self._display = None self._main_image = None self._overlay_image = None self.fps_text = "FPS: 0" self.direction_text = "Direction: Forwards" self._text_font = None self.set_segmentation_ids() # Directions: # -1 : Left # 0 : Forwards # 1 : Right self._direction = 0 # Direction defaults to forwards self.out = None # Network output self.throttle = 0 # Throttle output self.max_throttle = 0.35 # Throttle limit # Quitting self._request_quit = False def execute(self): """"Launch PyGame.""" pygame.init() self.__init_game() # Initialize fonts if not pygame.font.get_init(): pygame.font.init() self._text_font = pygame.font.SysFont("helvetica", 24) while not self._request_quit: self.__on_loop() self.__on_render() if self._request_quit: pygame.display.quit() pygame.quit() self.client.enableApiControl(False) # Give control back to user return def set_segmentation_ids(self): # rgb_file = open("seg_rgbs.txt", "r") # lines = rgb_file.readlines() # for l in lines: # s = l.split('[') # self.color_map[int(s[0].rstrip())] = eval('[' + s[1].rstrip()) # self.val_map[tuple(eval('[' + s[1].rstrip()))] = int(s[0]. # rstrip()) found = self.client.simSetSegmentationObjectID("[\w]*", 0, True) print("Reset all segmentations to zero: %r" % found) self.client.simSetSegmentationObjectID("ParkingAnnotRoad[\w]*", 22, True) self.client.simSetSegmentationObjectID("CrosswalksRoad[\w]*", 23, True) self.client.simSetSegmentationObjectID("Car[\w]*", 24, True) self.client.simSetSegmentationObjectID("GroundRoad[\w]*", 25, True) self.client.simSetSegmentationObjectID("SolidMarkingRoad[\w]*", 26, True) self.client.simSetSegmentationObjectID("DashedMarkingRoad[\w]*", 27, True) self.client.simSetSegmentationObjectID("StopLinesRoad[\w]*", 28, True) self.client.simSetSegmentationObjectID("ParkingLinesRoad[\w]*", 29, True) self.client.simSetSegmentationObjectID("JunctionsAnnotRoad[\w]*", 30, True) for i in range(max_lanes): self.client.simSetSegmentationObjectID( "LaneRoadAnnot" + str(i) + "[\w]*", i + 31, True) def __init_game(self): """Initializes the PyGame window and creates a new episode. This is separate from the main init method because the init is intended to create an instance of the class, but not to start the game objects yet. """ self.__on_reset() self._display = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) self.seg_net.load_state_dict(torch.load(self.weight1)) if not self.seg_only: self.fcd.load_state_dict(torch.load(self.weight2)) print("PyGame started") def __on_reset(self): """Resets the state of the client.""" print("Resetting client") self.client.reset() self._timer = Timer() def __on_loop(self): """Commands to execute on every loop.""" # Make time tick self._timer.tick() # Get an image from Unreal response = self.client.simGetImages([ airsim.ImageRequest("0", airsim.ImageType.Segmentation, False, False) ]) rgb = None if response: rgb = self.__response_to_cv(response[0], 3) self._main_image = rgb # Get key presses and parse them events = pygame.event.get() for event in events: if event.type == pygame.KEYDOWN: self.__parse_event(event) if event.type == pygame.QUIT: self._request_quit = True # Run the network # First convert the images to tensors rgb = self.__to_tensor(rgb).float().to(self.device) self.seg_out = self.seg_net.forward(torch.unsqueeze(rgb, 0)) self._overlay_image = self.seg_out.cpu().detach()[0].numpy() self._overlay_image = self._overlay_image.transpose(1, 2, 0) if not self.seg_only: # Flatten x = self.seg_out.view(-1, self._num_flat_features(self.seg_out)) # Analyze for steering x = self.fcd(x, [0, [self._direction]]) # get its data, then to numpy, then to a tuple self.out = tuple(x.cpu().detach().numpy()) # Now send the command to airsim if MAX_THROTTLE_ONLY: self.throttle = self.max_throttle else: self.throttle = self.out[0][1] self.__send_command((self.out[0][0], self.throttle)) # Computation is now complete. Add to the counter. self.counter += 1 # Determine then update direction if self._direction == 0: direction = "Forward" elif self._direction == -1: direction = "Left" else: direction = "Right" self.direction_text = "Direction: {0}".format(direction) # Update FPS if self._timer.elapsed_seconds_since_lap() > 0.3: # Determine FPS fps = float(self.counter) / self._timer.elapsed_seconds_since_lap() # Update the info self.fps_text = "FPS: {0}".format(int(fps)) # Reset counters self.counter = 0 self._timer.lap() pygame.display.update() # Finally, update the display. def __on_render(self): """Renders the pygame window itself.""" if self._main_image is not None and self._overlay_image is not None: # If there is an image in the pipeline, render it. img = self.__overlay_images(self._main_image, self._overlay_image) surface_main = pygame.surfarray.make_surface(img.swapaxes(0, 1)) self._display.blit(surface_main, (0, 0)) # Render white fill to "reset" the text self._display.fill((255, 255, 255), rect=pygame.Rect(0, 64, WINDOW_WIDTH, WINDOW_HEIGHT - 64)) # Create the text in the window surface_fps = self._text_font.render(self.fps_text, True, (0, 0, 0)) surface_direction = self._text_font.render(self.direction_text, True, (0, 0, 0)) if self.out is None: self.out = (np.array([0], dtype="float32"), ) surface_steering = self._text_font.render( "Steering: %.2f" % self.out[0][0], True, (0, 0, 0)) surface_throttle = self._text_font.render( "Throttle: %.2f" % self.throttle, True, (0, 0, 0)) # And now render that text self._display.blit(surface_fps, (6, 70)) self._display.blit(surface_direction, (120, 70)) self._display.blit(surface_steering, (6, 100)) self._display.blit(surface_throttle, (6, 130)) @staticmethod def __response_to_cv(r, channels): if r.compress: image = cv2.imdecode( np.fromstring(r.image_data_uint8, dtype=np.uint8), 1) image = image.reshape(r.height, r.width, channels) image = cv2.cvtColor(image[:, :, 0:channels], cv2.COLOR_RGB2BGR) else: image = np.frombuffer(r.image_data_uint8, dtype=np.uint8) image = image.reshape(r.height, r.width, channels + 1) image = image[:, :, 0:channels] return image @staticmethod def __overlay_images(image1, image2): """Overlays using linear dodge. Args: image1 (np.ndarray): First image. image2 (np.ndarray): Second image. The one that will become the overlay. Returns: (np.ndarray) The overlayed image """ shape = image1.shape output = np.ndarray([shape[0], shape[1], shape[2]]) for i in range(shape[0]): for j in range(shape[1]): img2_color_array = [0, 0, 0] index_max = np.argmax(image2[i][j]) if index_max == 1: img2_color_array = np.array([0, 0, 255]) elif index_max == 2: img2_color_array = np.array([255, 0, 0]) for k in range(shape[2]): output[i][j][k] = max(image1[i][j][k], img2_color_array[k]) return output def __parse_event(self, event): """Parses PyGame events. Args: event (pygame.Event): The PyGame event to be parsed. """ if event.key == K_KP8: self._direction = 0 elif event.key == K_KP4: self._direction = -1 elif event.key == K_KP6: self._direction = 1 elif event.key == K_SPACE: self.__on_reset() elif event.key == K_q: self._request_quit = True def __send_command(self, command): """Sends driving commands over the AirSim API. Args: command (tuple): A tuple in the form (steering, throttle). """ car_control = airsim.CarControls() car_control.steering = float(command[0]) car_control.throttle = command[1] self.client.setCarControls(car_control) @staticmethod def __to_tensor(image): """Turns an image into a tensor Args: image: The image to be converted as an array of uncompressed bits. Returns: (torch.Tensor) the image as a tensor. """ image = image.transpose(2, 0, 1) return torch.from_numpy(image) @staticmethod def _num_flat_features(x): """Multiplies the number of features for flattening a convolution. References: https://pytorch.org/tutorials/beginner/blitz/neural_networks_ tutorial.html """ size = x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s in size: num_features *= s return num_features
class CarlaGame(object): def __init__(self, carla_client, args): self.client = carla_client self._carla_settings, self._intrinsic, self._camera_to_car_transform, self._lidar_to_car_transform = make_carla_settings( args) self._timer = None self._display = None self._main_image = None self._mini_view_image1 = None self._mini_view_image2 = None self._enable_autopilot = args.autopilot self._lidar_measurement = None self._map_view = None self._is_on_reverse = False self._city_name = args.map_name self._map = CarlaMap(self._city_name, 16.43, 50.0) if self._city_name is not None else None self._map_shape = self._map.map_image.shape if self._city_name is not None else None self._map_view = self._map.get_map( WINDOW_HEIGHT) if self._city_name is not None else None self._position = None self._agent_positions = None self.captured_frame_no = self.current_captured_frame_num() self._measurements = None self._extrinsic = None # To keep track of how far the car has driven since the last capture of data self._agent_location_on_last_capture = None self._frames_since_last_capture = 0 # How many frames we have captured since reset self._captured_frames_since_restart = 0 def current_captured_frame_num(self): # Figures out which frame number we currently are on # This is run once, when we start the simulator in case we already have a dataset. # The user can then choose to overwrite or append to the dataset. label_path = os.path.join(OUTPUT_FOLDER, 'label_2/') num_existing_data_files = len( [name for name in os.listdir(label_path) if name.endswith('.txt')]) print(num_existing_data_files) if num_existing_data_files == 0: return 0 answer = input( "There already exists a dataset in {}. Would you like to (O)verwrite or (A)ppend the dataset? (O/A)" .format(OUTPUT_FOLDER)) if answer.upper() == "O": logging.info( "Resetting frame number to 0 and overwriting existing") # Overwrite the data return 0 logging.info("Continuing recording data on frame number {}".format( num_existing_data_files)) return num_existing_data_files def execute(self): """Launch the PyGame.""" pygame.init() self._initialize_game() try: while True: for event in pygame.event.get(): if event.type == pygame.QUIT: return reset = self._on_loop() if not reset: self._on_render() finally: pygame.quit() def _initialize_game(self): if self._city_name is not None: self._display = pygame.display.set_mode( (WINDOW_WIDTH + int( (WINDOW_HEIGHT / float(self._map.map_image.shape[0])) * self._map.map_image.shape[1]), WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) else: self._display = pygame.display.set_mode( (WINDOW_WIDTH, WINDOW_HEIGHT), pygame.HWSURFACE | pygame.DOUBLEBUF) logging.debug('pygame started') self._on_new_episode() def _on_new_episode(self): self._carla_settings.randomize_seeds() self._carla_settings.randomize_weather() scene = self.client.load_settings(self._carla_settings) number_of_player_starts = len(scene.player_start_spots) player_start = np.random.randint(number_of_player_starts) logging.info('Starting new episode...') self.client.start_episode(player_start) self._timer = Timer() self._is_on_reverse = False # Reset all tracking variables self._agent_location_on_last_capture = None self._frames_since_last_capture = 0 self._captured_frames_since_restart = 0 def _on_loop(self): self._timer.tick() measurements, sensor_data = self.client.read_data() # logging.info("Frame no: {}, = {}".format(self.captured_frame_no, # (self.captured_frame_no + 1) % NUM_RECORDINGS_BEFORE_RESET)) # Reset the environment if the agent is stuck or can't find any agents or if we have captured enough frames in this one is_stuck = self._frames_since_last_capture >= NUM_EMPTY_FRAMES_BEFORE_RESET is_enough_datapoints = (self._captured_frames_since_restart + 1) % NUM_RECORDINGS_BEFORE_RESET == 0 if (is_stuck or is_enough_datapoints) and GEN_DATA: logging.warning("Is stucK: {}, is_enough_datapoints: {}".format( is_stuck, is_enough_datapoints)) self._on_new_episode() # If we dont sleep, the client will continue to render return True # (Extrinsic) Rt Matrix # (Camera) local 3d to world 3d. # Get the transform from the player protobuf transformation. world_transform = Transform(measurements.player_measurements.transform) # Compute the final transformation matrix. self._extrinsic = world_transform * self._camera_to_car_transform self._measurements = measurements self._last_player_location = measurements.player_measurements.transform.location self._main_image = sensor_data.get('CameraRGB', None) self._lidar_measurement = sensor_data.get('Lidar32', None) self._depth_image = sensor_data.get('DepthCamera', None) # Print measurements every second. if self._timer.elapsed_seconds_since_lap() > 1.0: if self._city_name is not None: # Function to get car position on map. map_position = self._map.convert_to_pixel([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) # Function to get orientation of the road car is in. lane_orientation = self._map.get_lane_orientation([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) MeasurementsDisplayHelper.print_player_measurements_map( measurements.player_measurements, map_position, lane_orientation, self._timer) else: MeasurementsDisplayHelper.print_player_measurements( measurements.player_measurements, self._timer) # Plot position on the map as well. self._timer.lap() control = self._get_keyboard_control(pygame.key.get_pressed()) # Set the player position if self._city_name is not None: self._position = self._map.convert_to_pixel([ measurements.player_measurements.transform.location.x, measurements.player_measurements.transform.location.y, measurements.player_measurements.transform.location.z ]) self._agent_positions = measurements.non_player_agents if control is None: self._on_new_episode() elif self._enable_autopilot: self.client.send_control( measurements.player_measurements.autopilot_control) else: self.client.send_control(control) def _get_keyboard_control(self, keys): """ Return a VehicleControl message based on the pressed keys. Return None if a new episode was requested. """ control = KeyboardHelper.get_keyboard_control(keys, self._is_on_reverse, self._enable_autopilot) if control is not None: control, self._is_on_reverse, self._enable_autopilot = control return control def _on_render(self): datapoints = [] if self._main_image is not None and self._depth_image is not None: # Convert main image image = image_converter.to_rgb_array(self._main_image) # Retrieve and draw datapoints image, datapoints = self._generate_datapoints(image) # Draw lidar # Camera coordinate system is left, up, forwards if VISUALIZE_LIDAR: # Calculation to shift bboxes relative to pitch and roll of player rotation = self._measurements.player_measurements.transform.rotation pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw # Since measurements are in degrees, convert to radians pitch = degrees_to_radians(pitch) roll = degrees_to_radians(roll) yaw = degrees_to_radians(yaw) print('pitch: ', pitch) print('roll: ', roll) print('yaw: ', yaw) # Rotation matrix for pitch rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0], [-sin(pitch), 0, cos(pitch)]]) # Rotation matrix for roll rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)], [0, sin(roll), cos(roll)]]) # combined rotation matrix, must be in order roll, pitch, yaw rotRP = np.matmul(rotR, rotP) # Take the points from the point cloud and transform to car space point_cloud = np.array( self._lidar_to_car_transform.transform_points( self._lidar_measurement.data)) point_cloud[:, 2] -= LIDAR_HEIGHT_POS point_cloud = np.matmul(rotRP, point_cloud.T).T # print(self._lidar_to_car_transform.matrix) # print(self._camera_to_car_transform.matrix) # Transform to camera space by the inverse of camera_to_car transform point_cloud_cam = self._camera_to_car_transform.inverse( ).transform_points(point_cloud) point_cloud_cam[:, 1] += LIDAR_HEIGHT_POS image = lidar_utils.project_point_cloud( image, point_cloud_cam, self._intrinsic, 1) # Display image surface = pygame.surfarray.make_surface(image.swapaxes(0, 1)) self._display.blit(surface, (0, 0)) if self._map_view is not None: self._display_agents(self._map_view) pygame.display.flip() # Determine whether to save files distance_driven = self._distance_since_last_recording() #print("Distance driven since last recording: {}".format(distance_driven)) has_driven_long_enough = distance_driven is None or distance_driven > DISTANCE_SINCE_LAST_RECORDING if (self._timer.step + 1) % STEPS_BETWEEN_RECORDINGS == 0: if has_driven_long_enough and datapoints: # Avoid doing this twice or unnecessarily often if not VISUALIZE_LIDAR: # Calculation to shift bboxes relative to pitch and roll of player rotation = self._measurements.player_measurements.transform.rotation pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw # Since measurements are in degrees, convert to radians pitch = degrees_to_radians(pitch) roll = degrees_to_radians(roll) yaw = degrees_to_radians(yaw) print('pitch: ', pitch) print('roll: ', roll) print('yaw: ', yaw) # Rotation matrix for pitch rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0], [-sin(pitch), 0, cos(pitch)]]) # Rotation matrix for roll rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)], [0, sin(roll), cos(roll)]]) # combined rotation matrix, must be in order roll, pitch, yaw rotRP = np.matmul(rotR, rotP) # Take the points from the point cloud and transform to car space point_cloud = np.array( self._lidar_to_car_transform.transform_points( self._lidar_measurement.data)) point_cloud[:, 2] -= LIDAR_HEIGHT_POS point_cloud = np.matmul(rotRP, point_cloud.T).T self._update_agent_location() # Save screen, lidar and kitti training labels together with calibration and groundplane files self._save_training_files(datapoints, point_cloud) self.captured_frame_no += 1 self._captured_frames_since_restart += 1 self._frames_since_last_capture = 0 else: logging.debug( "Could save datapoint, but agent has not driven {} meters since last recording (Currently {} meters)" .format(DISTANCE_SINCE_LAST_RECORDING, distance_driven)) else: self._frames_since_last_capture += 1 logging.debug( "Could not save training data - no visible agents of selected classes in scene" ) def _distance_since_last_recording(self): if self._agent_location_on_last_capture is None: return None cur_pos = vector3d_to_array( self._measurements.player_measurements.transform.location) last_pos = vector3d_to_array(self._agent_location_on_last_capture) def dist_func(x, y): return sum((x - y)**2) return dist_func(cur_pos, last_pos) def _update_agent_location(self): self._agent_location_on_last_capture = self._measurements.player_measurements.transform.location def _generate_datapoints(self, image): """ Returns a list of datapoints (labels and such) that are generated this frame together with the main image image """ datapoints = [] image = image.copy() # Remove this rotRP = np.identity(3) # Stores all datapoints for the current frames for agent in self._measurements.non_player_agents: if should_detect_class(agent) and GEN_DATA: image, kitti_datapoint = create_kitti_data_point( agent, self._intrinsic, self._extrinsic.matrix, image, self._depth_image, self._measurements.player_measurements, rotRP) if kitti_datapoint: datapoints.append(kitti_datapoint) return image, datapoints def _save_training_files(self, datapoints, point_cloud): logging.info( "Attempting to save at timer step {}, frame no: {}".format( self._timer.step, self.captured_frame_no)) groundplane_fname = GROUNDPLANE_PATH.format(self.captured_frame_no) lidar_fname = LIDAR_PATH.format(self.captured_frame_no) kitti_fname = LABEL_PATH.format(self.captured_frame_no) img_fname = IMAGE_PATH.format(self.captured_frame_no) calib_filename = CALIBRATION_PATH.format(self.captured_frame_no) save_groundplanes(groundplane_fname, self._measurements.player_measurements, LIDAR_HEIGHT_POS) save_ref_files(OUTPUT_FOLDER, self.captured_frame_no) save_image_data(img_fname, image_converter.to_rgb_array(self._main_image)) save_kitti_data(kitti_fname, datapoints) save_lidar_data(lidar_fname, point_cloud, LIDAR_HEIGHT_POS, LIDAR_DATA_FORMAT) save_calibration_matrices(calib_filename, self._intrinsic, self._extrinsic) def _display_agents(self, image): image = image[:, :, :3] new_window_width = (float(WINDOW_HEIGHT) / float(self._map_shape[0])) * \ float(self._map_shape[1]) surface = pygame.surfarray.make_surface(image.swapaxes(0, 1)) w_pos = int(self._position[0] * (float(WINDOW_HEIGHT) / float(self._map_shape[0]))) h_pos = int(self._position[1] * (new_window_width / float(self._map_shape[1]))) pygame.draw.circle(surface, [255, 0, 0, 255], (w_pos, h_pos), 6, 0) for agent in self._agent_positions: if agent.HasField('vehicle'): agent_position = self._map.convert_to_pixel([ agent.vehicle.transform.location.x, agent.vehicle.transform.location.y, agent.vehicle.transform.location.z ]) w_pos = int(agent_position[0] * (float(WINDOW_HEIGHT) / float(self._map_shape[0]))) h_pos = int(agent_position[1] * (new_window_width / float(self._map_shape[1]))) pygame.draw.circle(surface, [255, 0, 255, 255], (w_pos, h_pos), 4, 0) self._display.blit(surface, (WINDOW_WIDTH, 0))