示例#1
0
    def train_model(self,
                    csv_file,
                    root_dir,
                    num_epochs,
                    batch_size,
                    silent=False):
        """Trains the model.

        This method also visualizes the training of the network. It is able to
        show the progress in terms of step, batch, and epoch number, loss, and
        the plot from the batch. It also outputs a plot of the loss over
        time.

        Args:
            csv_file (string): File address of the csv_file for training.
            root_dir (string): Root directory of the data for training.
            num_epochs (int): Number of epochs to train for
            batch_size (int): Number of objects in each batch
            silent (bool): Whether to show the plot or not. True hides the plot.
        """# }}}
        # Start by making the tkinter parts{{{{{{{{{
        root = tk.Tk()
        root.title("DriveNet Training")
        root.geometry("350x130")

        # Create timer and counter to calculate processing rate
        timer = Timer()
        counter = 0

        # Plot save location
        plot_loc = path.join(
            path.split(self.save_dir)[0],
            strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv')

        # Configure the grid and geometry
        # -----------------------
        # | step     | rate     |
        # | epoch    | loss     |
        # | status message      |
        # -----------------------
        root.columnconfigure(0, minsize=175)
        root.columnconfigure(1, minsize=175)

        # Prepare tk variables with default values
        step_var = tk.StringVar(master=root, value="Step: 0/0")
        rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s")
        epoch_var = tk.StringVar(master=root,
                                 value="Epoch: 1/{}".format(num_epochs))
        loss_var = tk.StringVar(master=root, value="Loss: 0")
        time_var = tk.StringVar(master=root, value="Time left: 0:00")
        status = tk.StringVar(master=root, value="Preparing dataset")

        # Prepare tk labels to be put on the grid
        tk.Label(root, textvariable=step_var).grid(row=0,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=rate_var).grid(row=0,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=epoch_var).grid(row=1,
                                                    column=0,
                                                    sticky="W",
                                                    padx=5,
                                                    pady=5)
        tk.Label(root, textvariable=loss_var).grid(row=1,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=time_var).grid(row=2,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=status).grid(row=3,
                                                 column=0,
                                                 columnspan=2,
                                                 sticky="SW",
                                                 padx=5,
                                                 pady=5)  # }}}

        # Update root so it actually shows something
        root.update_idletasks()
        root.update()

        # Open file for loss data plot
        loss_file = open(plot_loc, 'a')  # }}}}}}

        # Prepare the datasets and their corresponding dataloaders
        complete_data = DrivingSimDataset(csv_file, root_dir)  # {{{
        train_loader = DataLoader(dataset=complete_data,
                                  batch_size=batch_size,
                                  shuffle=True)
        status.set("Data sets loaded")  # {{{

        root.update_idletasks()
        root.update()

        # total_step = 0
        status.set("Training")
        root.update_idletasks()
        root.update()

        self.load_state_dict()
        if not path.isfile(self.save_dir):
            status.set(
                "Weights do not exist. Running with random weights.")  # }}}

        # print("len(train_loader) = {0}".format(total_step))
        # print("Dataset:")
        # for i in enumerate(train_loader):
        #     print(i[1]['image'])
        #     print(i[1]['vehicle_commands'])

        # acc_list = []
# }}}
        for epoch in range(num_epochs):
            status.set("Training: all")  # {{{
            root.update_idletasks()
            root.update()  # }}}

            #  Former different loaders are now one single loader for all data
            total_step = len(train_loader)

            for data in enumerate(train_loader):
                # run the forward pass
                # data[0] is the iteration, data[1] is the data
                #  print(images)
                images = data[1]['image']
                sec = data[1]['sec']

                # Put segmented and normal into one tensor
                images = cat((images, sec), dim=1)

                vehicle_info = (data[1]["vehicle_commands"], data[1]["cmd"])

                # Prep target by turning it into a CUDA compatible format
                car_data = vehicle_info[0].to(self.device, non_blocking=True)

                # self.optimizer.zero_grad()
                images = torch.unsqueeze(images, 0)

                self.run_model(images.to(self.device, non_blocking=True),
                               vehicle_info, batch_size)
                #
                # # Print the out result{{{
                # print("Network output:")
                # print(self.out.cpu().detach().numpy())# }}}

                # calculate the loss
                if self.out is None:
                    raise ValueError("forward() has not been run properly.")
                loss = self.criterion(self.out, car_data)

                # Zero grad
                self.optimizer.zero_grad()
                # Backprop and preform Adam optimization
                loss.backward()
                self.optimizer.step()

                # # Track the accuracy{{{
                # total = data.size(0)
                # _, predicted = torch.max(self.output.data, 1)
                # correct = (predicted == data[0]).sum().item()
                # acc_list.append(correct / total)

                # Update data
                step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step))
                epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs))
                loss_var.set("Loss: {:.3f}".format(loss.item()))  # }}}

                counter += 1
                if timer.elapsed_seconds_since_lap() > 0.3:  # {{{
                    sps = float(counter) / timer.elapsed_seconds_since_lap()
                    rate_var.set("Rate: {:.2f} steps/s".format(sps))
                    timer.lap()
                    counter = 0

                    if sps == 0:
                        time_left = "NaN"
                    else:
                        time_left = int(((total_step * num_epochs) -
                                         ((float(data[0]) + 1.0) +
                                          (total_step * epoch))) / sps)
                        time_left = datetime.timedelta(seconds=time_left)
                        time_left = str(time_left)
                    time_var.set("Time left: {}".format(time_left))

                root.update()
                root.update_idletasks()  # }}}

                loss_file.write("{}\n".format(loss.item()))

        # Now save the loss file and the weights
        loss_file.close()

        # Save the bias and weights
        torch.save(self.network.state_dict(), self.save_dir)

        torch.cuda.empty_cache()

        status.set("Done")  # {{{
        if not silent:
            PlotIt(plot_loc)
        root.mainloop()  # }}}
示例#2
0
class Controller:
    def __init__(self, network):
        """Acts as a controller object that sends and receives data from AirSim.

            This class acts as the interface between the network and the AirSim
            simulation inside of Unreal Engine. It is able to receive camera
            images from AirSim as well as send the driving commands to it and
            is responsible for running the network.

            Control Scheme:
                NUM_4   : Left
                NUM_8   : Forwards
                NUM_6   : Right
                Q       : Quit
                SPACE   : Reset

        Args:
            network (runner.Runner): A PyTorch network wrapped by a runner.
        """
        # Initialize AirSim connection
        self.client = airsim.CarClient()
        self.client.confirmConnection()
        self.client.enableApiControl(True)

        # Set up the network
        self.network = network

        # Set up timers for fps counting
        self._timer = Timer()
        self.counter = 0

        # Set up display variables
        self._display = None
        self._main_image = None
        self.fps_text = "FPS: 0"
        self.direction_text = "Direction: Forwards"
        self._text_font = None

        # Directions:
        # -1 : Left
        # 0 : Forwards
        # 1 : Right
        self._direction = 0  # Direction defaults to forwards

        self.out = None  # Network output
        self.throttle = 0  # Throttle output

        self.max_throttle = 0.35  # Throttle limit

        # Quitting
        self._request_quit = False

    def execute(self):
        """"Launch PyGame."""
        pygame.init()
        self.__init_game()

        # Initialize fonts
        if not pygame.font.get_init():
            pygame.font.init()

        self._text_font = pygame.font.SysFont("helvetica", 24)

        while not self._request_quit:
            self.__on_loop()
            self.__on_render()

        if self._request_quit:
            pygame.display.quit()
            pygame.quit()
            self.client.enableApiControl(False)  # Give control back to user
            return

    def __init_game(self):
        """Initializes the PyGame window and creates a new episode.

            This is separate from the main init method because the init is
            intended to create an instance of the class, but not to start
            the game objects yet.
        """
        self.__on_reset()

        self._display = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT),
                                                pygame.HWSURFACE
                                                | pygame.DOUBLEBUF)

        self.network.load_state_dict()
        print("PyGame started")

    def __on_reset(self):
        """Resets the state of the client."""
        print("Resetting client")
        self.client.reset()
        self._timer = Timer()

    def __on_loop(self):
        """Commands to execute on every loop."""
        # Make time tick
        self._timer.tick()

        # Get an image from Unreal
        response = self.client.simGetImages([
            airsim.ImageRequest("0", airsim.ImageType.Scene, False, False)])
        rgb = None
        if response:
            rgb = self.__response_to_cv(response[0], 3)
            self._main_image = rgb

        # Get key presses and parse them
        events = pygame.event.get()

        for event in events:
            if event.type == pygame.KEYDOWN:
                self.__parse_event(event)
            if event.type == pygame.QUIT:
                self._request_quit = True

        # run the network
        # First convert the images to tensors
        rgb = self.__to_tensor(rgb).float().to(self.network.device)

        self.out = self.network.run_model(torch.unsqueeze(torch.unsqueeze(rgb, 0), 0),
                                         [0, [self._direction]],
                                         1)
        # get its data, then to numpy, then to a tuple
        self.out = tuple(self.out.cpu().detach().numpy())
        
        # Now send the command to airsim
        if MAX_THROTTLE_ONLY:
            self.throttle = self.max_throttle
        else:
            self.throttle = self.out[0][1]
        self.__send_command((self.out[0][0], self.throttle))

        # Computation is now complete. Add to the counter.
        self.counter += 1

        # Determine then update direction
        if self._direction == 0:
            direction = "Forward"
        elif self._direction == -1:
            direction = "Left"
        else:
            direction = "Right"
        self.direction_text = "Direction: {0}".format(direction)

        # Update FPS
        if self._timer.elapsed_seconds_since_lap() > 0.3:
            # Determine FPS
            fps = float(self.counter) / self._timer.elapsed_seconds_since_lap()

            # Update the info
            self.fps_text = "FPS: {0}".format(int(fps))

            # Reset counters
            self.counter = 0
            self._timer.lap()

        pygame.display.update()  # Finally, update the display.

    def __on_render(self):
        """Renders the pygame window itself."""
        if self._main_image is not None:
            # If there is an image in the pipeline, render it.
            surface_main = pygame.surfarray.make_surface(
                self._main_image.swapaxes(0, 1))
            self._display.blit(surface_main, (0, 0))

        # Render white fill to "reset" the text
        self._display.fill((255, 255, 255),
                           rect=pygame.Rect(0, 64, WINDOW_WIDTH,
                                            WINDOW_HEIGHT - 64))

        # Create the text in the window
        surface_fps = self._text_font.render(self.fps_text, True,
                                             (0, 0, 0))
        surface_direction = self._text_font.render(self.direction_text, True,
                                                   (0, 0, 0))

        if self.out is None:
            self.out = [0, 0]
        surface_steering = self._text_font.render("Steering: %.2f"
                                                  % self.out[0][0], True,
                                                  (0, 0, 0))
        surface_throttle = self._text_font.render("Throttle: %.2f"
                                                  % self.throttle, True,
                                                  (0, 0, 0))

        # And now render that text
        self._display.blit(surface_fps, (6, 70))
        self._display.blit(surface_direction, (120, 70))
        self._display.blit(surface_steering, (6, 100))
        self._display.blit(surface_throttle, (6, 130))

    @staticmethod
    def __response_to_cv(r, channels):
        if r.compress:
            image = cv2.imdecode(np.fromstring(r.image_data_uint8,
                                               dtype=np.uint8),
                                 1)
            image = image.reshape(r.height, r.width, channels)
            image = cv2.cvtColor(image[:, :, 0:channels], cv2.COLOR_RGB2BGR)

        else:
            image = np.frombuffer(r.image_data_uint8, dtype=np.uint8)
            image = image.reshape(r.height, r.width, channels + 1)
            image = image[:, :, 0:channels]
        return image

    def __parse_event(self, event):
        """Parses PyGame events.

        Args:
            event (pygame.Event): The PyGame event to be parsed.
        """
        if event.key == K_KP8:
            self._direction = 0
        elif event.key == K_KP4:
            self._direction = -1
        elif event.key == K_KP6:
            self._direction = 1
        elif event.key == K_SPACE:
            self.__on_reset()
        elif event.key == K_q:
            self._request_quit = True

    def __send_command(self, command):
        """Sends driving commands over the AirSim API.

        Args:
            command (tuple): A tuple in the form (steering, throttle).
        """
        car_control = airsim.CarControls()
        car_control.steering = float(command[0])
        car_control.throttle = command[1]
        self.client.setCarControls(car_control)

    @staticmethod
    def __to_tensor(image):
        """Turns an image into a tensor

        Args:
            image: The image to be converted as an array of uncompressed bits.

        Returns:
            (torch.Tensor) the image as a tensor.
        """
        image = image.transpose(2, 0, 1)

        return torch.from_numpy(image)
class CarlaGame(object):
    def __init__(self, carla_client, args):
        self.client = carla_client
        self._carla_settings, self._intrinsic, self._camera_to_car_transform, self._lidar_to_car_transform = make_carla_settings(
            args)
        self._timer = None
        self._display = None
        self._main_image = None
        self._mini_view_image1 = None
        self._mini_view_image2 = None
        self._enable_autopilot = args.autopilot
        self._lidar_measurement = None
        self._map_view = None
        self._is_on_reverse = False
        self._city_name = args.map_name
        self._map = CarlaMap(self._city_name, 16.43,
                             50.0) if self._city_name is not None else None
        self._map_shape = self._map.map_image.shape if self._city_name is not None else None
        self._map_view = self._map.get_map(
            WINDOW_HEIGHT) if self._city_name is not None else None
        self._position = None
        self._agent_positions = None
        self.captured_frame_no = 9
        self._measurements = None
        self._extrinsic = None
        # To keep track of how far the car has driven since the last capture of data
        self._agent_location_on_last_capture = None
        self._frames_since_last_capture = 0
        # How many frames we have captured since reset
        self._captured_frames_since_restart = 0
        self._build_network()

    def _build_network(self):
        cfg_from_file(CONFIG_FILE)
        # set bs, gpu_num and workers_num to be 1
        cfg.TRAIN.CONFIG.BATCH_SIZE = 1  # only support bs=1 when testing
        cfg.TRAIN.CONFIG.GPU_NUM = 1
        cfg.DATA_LOADER.NUM_THREADS = 1
        cfg.TEST.WITH_GT = False

        self.evaluator = Evaluator(config=cfg, model_path=MODEL_PATH)

    def execute(self):
        """Launch the PyGame."""
        pygame.init()
        self._initialize_game()
        try:
            while True:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        return
                reset = self._on_loop()
                if not reset:
                    self._on_render()
        finally:
            pygame.quit()

    def _initialize_game(self):
        if self._city_name is not None:
            self._display = pygame.display.set_mode(
                (WINDOW_WIDTH + int(
                    (WINDOW_HEIGHT / float(self._map.map_image.shape[0])) *
                    self._map.map_image.shape[1]), WINDOW_HEIGHT),
                pygame.HWSURFACE | pygame.DOUBLEBUF)
        else:
            self._display = pygame.display.set_mode(
                (WINDOW_WIDTH, WINDOW_HEIGHT),
                pygame.HWSURFACE | pygame.DOUBLEBUF)

        logging.debug('pygame started')
        self._on_new_episode()

    def _on_new_episode(self):
        self._carla_settings.randomize_seeds()
        self._carla_settings.randomize_weather()
        scene = self.client.load_settings(self._carla_settings)
        number_of_player_starts = len(scene.player_start_spots)
        player_start = np.random.randint(number_of_player_starts)
        logging.info('Starting new episode...')
        self.client.start_episode(player_start)
        self._timer = Timer()
        self._is_on_reverse = False

        # Reset all tracking variables
        self._agent_location_on_last_capture = None
        self._frames_since_last_capture = 0
        self._captured_frames_since_restart = 0

    def _on_loop(self):
        self._timer.tick()
        measurements, sensor_data = self.client.read_data()
        is_stuck = self._frames_since_last_capture >= NUM_EMPTY_FRAMES_BEFORE_RESET
        is_stuck = False
        is_enough_datapoints = (self._captured_frames_since_restart +
                                1) % NUM_RECORDINGS_BEFORE_RESET == 0

        if (is_stuck or is_enough_datapoints) and GEN_DATA:
            logging.warning("Is stucK: {}, is_enough_datapoints: {}".format(
                is_stuck, is_enough_datapoints))
            self._on_new_episode()
            # If we dont sleep, the client will continue to render
            return True

        # (Extrinsic) Rt Matrix
        # (Camera) local 3d to world 3d.
        # Get the transform from the player protobuf transformation.
        world_transform = Transform(measurements.player_measurements.transform)
        # Compute the final transformation matrix.
        self._extrinsic = world_transform * self._camera_to_car_transform
        self._measurements = measurements
        self._last_player_location = measurements.player_measurements.transform.location
        self._main_image = sensor_data.get('CameraRGB', None)
        self._lidar_measurement = sensor_data.get('Lidar32', None)
        self._depth_image = sensor_data.get('DepthCamera', None)
        # Print measurements every second.
        if self._timer.elapsed_seconds_since_lap() > 1.0:
            if self._city_name is not None:
                # Function to get car position on map.
                map_position = self._map.convert_to_pixel([
                    measurements.player_measurements.transform.location.x,
                    measurements.player_measurements.transform.location.y,
                    measurements.player_measurements.transform.location.z
                ])
                # Function to get orientation of the road car is in.
                lane_orientation = self._map.get_lane_orientation([
                    measurements.player_measurements.transform.location.x,
                    measurements.player_measurements.transform.location.y,
                    measurements.player_measurements.transform.location.z
                ])

                MeasurementsDisplayHelper.print_player_measurements_map(
                    measurements.player_measurements, map_position,
                    lane_orientation, self._timer)
            else:
                MeasurementsDisplayHelper.print_player_measurements(
                    measurements.player_measurements, self._timer)
            # Plot position on the map as well.
            self._timer.lap()

        control = self._get_keyboard_control(pygame.key.get_pressed())
        # Set the player position
        if self._city_name is not None:
            self._position = self._map.convert_to_pixel([
                measurements.player_measurements.transform.location.x,
                measurements.player_measurements.transform.location.y,
                measurements.player_measurements.transform.location.z
            ])
            self._agent_positions = measurements.non_player_agents

        if control is None:
            self._on_new_episode()
        elif self._enable_autopilot:
            self.client.send_control(
                measurements.player_measurements.autopilot_control)
        else:
            self.client.send_control(control)

    def _get_keyboard_control(self, keys):
        """
        Return a VehicleControl message based on the pressed keys. Return None
        if a new episode was requested.
        """
        control = KeyboardHelper.get_keyboard_control(keys,
                                                      self._is_on_reverse,
                                                      self._enable_autopilot)
        if control is not None:
            control, self._is_on_reverse, self._enable_autopilot = control
        return control

    def _on_render(self):
        datapoints = []

        if self._main_image is not None and self._depth_image is not None:
            # Convert main image
            image = image_converter.to_rgb_array(self._main_image)

            # Retrieve and draw datapoints
            image, datapoints = self._generate_datapoints(image)

            # Draw lidar
            # Camera coordinate system is left, up, forwards
            if VISUALIZE_LIDAR:
                # Calculation to shift bboxes relative to pitch and roll of player
                rotation = self._measurements.player_measurements.transform.rotation
                pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw
                # Since measurements are in degrees, convert to radians

                pitch = degrees_to_radians(pitch)
                roll = degrees_to_radians(roll)
                yaw = degrees_to_radians(yaw)
                print('pitch: ', pitch)
                print('roll: ', roll)
                print('yaw: ', yaw)

                # Rotation matrix for pitch
                rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0],
                                 [-sin(pitch), 0, cos(pitch)]])
                # Rotation matrix for roll
                rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)],
                                 [0, sin(roll), cos(roll)]])

                # combined rotation matrix, must be in order roll, pitch, yaw
                rotRP = np.matmul(rotR, rotP)
                # Take the points from the point cloud and transform to car space
                point_cloud = np.array(
                    self._lidar_to_car_transform.transform_points(
                        self._lidar_measurement.data))
                point_cloud[:, 2] -= LIDAR_HEIGHT_POS
                point_cloud = np.matmul(rotRP, point_cloud.T).T
                # Transform to camera space by the inverse of camera_to_car transform
                point_cloud_cam = self._camera_to_car_transform.inverse(
                ).transform_points(point_cloud)
                point_cloud_cam[:, 1] += LIDAR_HEIGHT_POS
                image = lidar_utils.project_point_cloud(
                    image, point_cloud_cam, self._intrinsic, 1)

            # Display image
            surface = pygame.surfarray.make_surface(image.swapaxes(0, 1))
            self._display.blit(surface, (0, 0))
            pygame.display.flip()

            if (self._timer.step + 1) % STEPS_BETWEEN_RECORDINGS == 0:
                if datapoints:
                    # Avoid doing this twice or unnecessarily often
                    if not VISUALIZE_LIDAR:
                        # Calculation to shift bboxes relative to pitch and roll of player
                        rotation = self._measurements.player_measurements.transform.rotation
                        pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw
                        # Since measurements are in degrees, convert to radians

                        pitch = degrees_to_radians(pitch)
                        roll = degrees_to_radians(roll)
                        yaw = degrees_to_radians(yaw)
                        print('pitch: ', pitch)
                        print('roll: ', roll)
                        print('yaw: ', yaw)

                        # Rotation matrix for pitch
                        rotP = np.array([[cos(pitch), 0,
                                          sin(pitch)], [0, 1, 0],
                                         [-sin(pitch), 0,
                                          cos(pitch)]])
                        # Rotation matrix for roll
                        rotR = np.array([[1, 0, 0], [0,
                                                     cos(roll), -sin(roll)],
                                         [0, sin(roll),
                                          cos(roll)]])

                        # combined rotation matrix, must be in order roll, pitch, yaw
                        rotRP = np.matmul(rotR, rotP)
                        # Take the points from the point cloud and transform to car space
                        point_cloud = np.array(
                            self._lidar_to_car_transform.transform_points(
                                self._lidar_measurement.data))
                        point_cloud[:, 2] -= LIDAR_HEIGHT_POS
                        point_cloud = np.matmul(rotRP, point_cloud.T).T
                    self._update_agent_location()
                    # Save screen, lidar and kitti training labels together with calibration and groundplane files
                    # self._save_training_files(datapoints, point_cloud)
                    img = get_image_data(
                        image_converter.to_rgb_array(self._main_image))
                    lid = get_lidar_data(point_cloud)
                    calib = get_calibration_matrices(self._intrinsic,
                                                     self._extrinsic)
                    mess = {
                        "idx": self.captured_frame_no,
                        "image_2": img,
                        "velodyne": lid,
                        "calib": calib
                    }
                    annos = self.evaluator.predict(mess)

                    # annos = self.evaluator.evaluate(self.captured_frame_no)
                    print(annos)
                    scores = annos["score"]
                    vertices = annos["vertices"]
                    bbox = annos["bbox"]

                    for i, score in enumerate(scores):
                        if score * 100 > 35:
                            # draw 3D
                            image = draw_projected_box3d(image,
                                                         vertices[i],
                                                         color=(0, 0, 255),
                                                         thickness=1)
                            image = cv2.putText(
                                image, str(round(score * 100, 2)),
                                (int(bbox[i][0]), int(bbox[i][1])),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 255),
                                1, cv2.LINE_AA)
                            # draw 2D
                            # bbox = annos["bbox"]
                            # x1, y1, x2, y2 = bbox[i]
                            # cv2.rectangle(image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=2)

                    # Display image
                    surface = pygame.surfarray.make_surface(
                        image.swapaxes(0, 1))
                    self._display.blit(surface, (0, 0))
                    pygame.display.flip()
                    self.captured_frame_no += 1
                    self._captured_frames_since_restart += 1
                    self._frames_since_last_capture = 0
                    time.sleep(0.5)
                else:
                    logging.debug("Could save datapoint".format(
                        DISTANCE_SINCE_LAST_RECORDING))
            else:
                self._frames_since_last_capture += 1
                logging.debug(
                    "Could not save training data - no visible agents of selected classes in scene"
                )

    def _update_agent_location(self):
        self._agent_location_on_last_capture = self._measurements.player_measurements.transform.location

    def _generate_datapoints(self, image):
        """ Returns a list of datapoints (labels and such) that are generated this frame together with the main image image """
        datapoints = []
        image = image.copy()

        # Remove this
        rotRP = np.identity(3)
        # Stores all datapoints for the current frames
        for agent in self._measurements.non_player_agents:
            if should_detect_class(agent) and GEN_DATA:
                image, kitti_datapoint = create_kitti_data_point(
                    agent, self._intrinsic, self._extrinsic.matrix, image,
                    self._depth_image, self._measurements.player_measurements,
                    rotRP)
                if kitti_datapoint:
                    datapoints.append(kitti_datapoint)

        return image, datapoints

    def _save_training_files(self, datapoints, point_cloud):
        logging.info(
            "Attempting to save at timer step {}, frame no: {}".format(
                self._timer.step, self.captured_frame_no))
        groundplane_fname = GROUNDPLANE_PATH.format(self.captured_frame_no)

        lidar_fname = LIDAR_PATH.format(self.captured_frame_no)
        kitti_fname = LABEL_PATH.format(self.captured_frame_no)
        img_fname = IMAGE_PATH.format(self.captured_frame_no)
        calib_filename = CALIBRATION_PATH.format(self.captured_frame_no)

        save_groundplanes(groundplane_fname,
                          self._measurements.player_measurements,
                          LIDAR_HEIGHT_POS)
        save_ref_files(OUTPUT_FOLDER, self.captured_frame_no)
        save_image_data(img_fname,
                        image_converter.to_rgb_array(self._main_image))
        save_kitti_data(kitti_fname, datapoints)
        save_lidar_data(lidar_fname, point_cloud, LIDAR_HEIGHT_POS,
                        LIDAR_DATA_FORMAT)
        save_calibration_matrices(calib_filename, self._intrinsic,
                                  self._extrinsic)
    def train_segmentation(self, image_dir, batch_size, num_epochs):
        """Trains the model.

        Args:
            image_dir (string): Root directory of the data for training.
            num_epochs (int): Number of epochs to train for.
            batch_size (int): Number of objects in each batch.
        """
        self.load_network("segmentation")
        # Start by making Tk parts
        root = tk.Tk()
        root.title("DriveNet Training")
        root.geometry("350x258")

        # Timers
        # Create timer and counter to calculate processing rate
        timer = Timer()
        counter = 0

        # Plot save location
        plot_loc = path.dirname(path.dirname(path.abspath(__file__)))
        plot_loc = path.join(plot_loc, "plot_csv")
        plot_loc = path.join(
            plot_loc,
            strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv')

        # Configure the grid and geometry
        # -----------------------
        # | Target segmentation |
        # | Output              |
        # | step     | rate     |
        # | epoch    | loss     |
        # | status message      |
        # -----------------------
        root.columnconfigure(0, minsize=175)
        root.columnconfigure(1, minsize=175)
        root.rowconfigure(0, minsize=64)
        root.rowconfigure(1, minsize=64)

        # Prepare tk variables with default values
        step_var = tk.StringVar(master=root, value="Step: 0/0")
        rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s")
        epoch_var = tk.StringVar(master=root,
                                 value="Epoch: 1/{}".format(num_epochs))
        loss_var = tk.StringVar(master=root, value="Loss: 0")
        time_var = tk.StringVar(master=root, value="Time left: 0:00")
        status = tk.StringVar(master=root, value="Preparing dataset")

        # Prepare image canvases
        black_array = io.imread(path.join(image_dir, "seg_00001-cam_0.png"))
        black_array.fill(0)
        black_image = ImageTk.PhotoImage(image=Image.fromarray(black_array))

        target_canvas = tk.Canvas(root, width=320, height=64)
        target_canvas_img = target_canvas.create_image(0,
                                                       0,
                                                       anchor="nw",
                                                       image=black_image)
        target_canvas.grid(row=0, column=0, columnspan=2)
        seg_canvas = tk.Canvas(root, width=320, height=64)
        seg_canvas_img = seg_canvas.create_image(0,
                                                 0,
                                                 anchor="nw",
                                                 image=black_image)
        seg_canvas.grid(row=1, column=0, columnspan=2)

        # Prepare tk labels to be put on the grid
        tk.Label(root, textvariable=step_var).grid(row=2,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=rate_var).grid(row=2,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=epoch_var).grid(row=3,
                                                    column=0,
                                                    sticky="W",
                                                    padx=5,
                                                    pady=5)
        tk.Label(root, textvariable=loss_var).grid(row=3,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=time_var).grid(row=4,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=status).grid(row=5,
                                                 column=0,
                                                 columnspan=2,
                                                 sticky="SW",
                                                 padx=5,
                                                 pady=5)

        # Update root so it actually shows something
        root.update_idletasks()
        root.update()

        # Open file for loss data plot
        loss_file = open(plot_loc, 'a')

        # Prepare the datasets and their corresponding dataloaders
        data = SegmentedDataset(image_dir)
        train_loader = DataLoader(dataset=data,
                                  batch_size=batch_size,
                                  shuffle=True)
        status.set("Data sets loaded")

        root.update_idletasks()
        root.update()

        # total_step = 0
        status.set("Training")
        root.update_idletasks()
        root.update()

        if path.isfile(self.weights_save_dir):
            self.network.load_state_dict(torch.load(self.weights_save_dir))

        for epoch in range(num_epochs):
            status.set("Training: all")
            root.update_idletasks()
            root.update()

            #  Former different loaders are now one single loader for all data
            total_step = len(train_loader)

            for data in enumerate(train_loader):
                # run the forward pass
                # data[0] is the iteration, data[1] is the data
                #  print(images)
                image = data[1]['image']
                seg = data[1]["seg"]

                # Prep target by turning it into a CUDA compatible format
                seg_cuda = data[1]["seg"].to(device=self.device,
                                             dtype=torch.long,
                                             non_blocking=True)
                seg = seg.numpy()

                out = self.network(image.to(self.device, non_blocking=True))

                # calculate the loss
                if out is None:
                    raise ValueError("forward() has not been run properly.")
                loss = self.criterion(out, seg_cuda)

                # Zero grad
                self.optimizer.zero_grad()
                # Backprop and preform Adam optimization
                loss.backward()
                self.optimizer.step()

                # # Track the accuracy{{{
                # total = data.size(0)
                # _, predicted = torch.max(self.output.data, 1)
                # correct = (predicted == data[0]).sum().item()
                # acc_list.append(correct / total)

                # Update data
                step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step))
                epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs))
                loss_var.set("Loss: {:.3f}".format(loss.item()))

                counter += 1
                if timer.elapsed_seconds_since_lap() > 0.3:
                    sps = float(counter) / timer.elapsed_seconds_since_lap()
                    rate_var.set("Rate: {:.2f} steps/s".format(sps))
                    timer.lap()
                    counter = 0

                    if sps == 0:
                        time_left = "NaN"
                    else:
                        time_left = int(((total_step * num_epochs) -
                                         ((float(data[0]) + 1.0) +
                                          (total_step * epoch))) / sps)
                        time_left = datetime.timedelta(seconds=time_left)
                        time_left = str(time_left)
                    time_var.set("Time left: {}".format(time_left))

                # update image only once per step
                if data[0] % 2 == 0:
                    target_array = self.class_to_image_array(seg)
                    target = ImageTk.PhotoImage(
                        image=Image.fromarray(target_array))
                    seg_array = self.class_prob_to_image_array(
                        out.cpu().detach()[0])
                    seg_img = ImageTk.PhotoImage(
                        image=Image.fromarray(seg_array))
                    target_canvas.itemconfig(target_canvas_img, image=target)
                    seg_canvas.itemconfig(seg_canvas_img, image=seg_img)

                root.update()
                root.update_idletasks()

                loss_file.write("{}\n".format(loss.item()))

        # Now save the loss file and the weights
        loss_file.close()

        # Save the bias and weights
        torch.save(self.network.state_dict(), self.weights_save_dir)

        torch.cuda.empty_cache()

        status.set("Done")
        PlotIt(plot_loc)
        root.mainloop()
    def train_fc(self, image_dir, batch_size, num_epochs, fc_weights):
        """Trains the fully connected layers using a pretrained segmentation.
        """
        self.load_network("full")
        # Start by making Tk parts
        root = tk.Tk()
        root.title("DriveNet Training")
        root.geometry("350x258")

        # Timers
        # Create timer and counter to calculate processing rate
        timer = Timer()
        counter = 0

        # Plot save location
        plot_loc = path.dirname(path.dirname(path.abspath(__file__)))
        plot_loc = path.join(plot_loc, "plot_csv")
        plot_loc = path.join(
            plot_loc,
            strftime("%Y_%m_%d_%H-%M-%S", gmtime()) + '-loss_data.csv')

        # Configure the grid and geometry
        # -----------------------
        # | Target segmentation |
        # | Output              |
        # | step     | rate     |
        # | epoch    | loss     |
        # | status message      |
        # -----------------------
        root.columnconfigure(0, minsize=175)
        root.columnconfigure(1, minsize=175)

        # Prepare tk variables with default values
        step_var = tk.StringVar(master=root, value="Step: 0/0")
        rate_var = tk.StringVar(master=root, value="Rate: 0 steps/s")
        epoch_var = tk.StringVar(master=root,
                                 value="Epoch: 1/{}".format(num_epochs))
        loss_var = tk.StringVar(master=root, value="Loss: 0")
        time_var = tk.StringVar(master=root, value="Time left: 0:00")
        status = tk.StringVar(master=root, value="Preparing dataset")

        # Prepare tk labels to be put on the grid
        tk.Label(root, textvariable=step_var).grid(row=0,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=rate_var).grid(row=0,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=epoch_var).grid(row=1,
                                                    column=0,
                                                    sticky="W",
                                                    padx=5,
                                                    pady=5)
        tk.Label(root, textvariable=loss_var).grid(row=1,
                                                   column=1,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=time_var).grid(row=2,
                                                   column=0,
                                                   sticky="W",
                                                   padx=5,
                                                   pady=5)
        tk.Label(root, textvariable=status).grid(row=3,
                                                 column=0,
                                                 columnspan=2,
                                                 sticky="SW",
                                                 padx=5,
                                                 pady=5)

        # Update root so it actually shows something
        root.update_idletasks()
        root.update()

        # Open file for loss data plot
        loss_file = open(plot_loc, 'a')

        # Prepare the datasets and their corresponding dataloaders
        data = SegmentedDataset(image_dir)
        train_loader = DataLoader(dataset=data,
                                  batch_size=batch_size,
                                  shuffle=True)
        status.set("Data sets loaded")

        root.update_idletasks()
        root.update()

        # total_step = 0
        status.set("Training")
        root.update_idletasks()
        root.update()

        if path.isfile(self.weights_save_dir):
            self.network.seg_net.load_state_dict(
                torch.load(self.weights_save_dir))

        if path.isfile(fc_weights):
            self.network.fcd.load_state_dict(torch.load(fc_weights))

        for epoch in range(num_epochs):
            status.set("Training: all")
            root.update_idletasks()
            root.update()

            #  Former different loaders are now one single loader for all data
            total_step = len(train_loader)

            for data in enumerate(train_loader):
                # run the forward pass
                # data[0] is the iteration, data[1] is the data
                #  print(images)
                image = data[1]['image']
                cmd = data[1]['cmd']

                # Prep target by turning it into a CUDA compatible format
                steering = data[1]["vehicle_commands"].to(self.device,
                                                          non_blocking=True)

                out = self.network(image.to(self.device, non_blocking=True),
                                   cmd)

                # calculate the loss
                if out is None:
                    raise ValueError("forward() has not been run properly.")
                loss = self.criterion(out, steering)

                # Zero grad
                self.optimizer.zero_grad()
                # Backprop and preform optimization
                loss.backward()
                self.optimizer.step()

                # # Track the accuracy{{{
                # total = data.size(0)
                # _, predicted = torch.max(self.output.data, 1)
                # correct = (predicted == data[0]).sum().item()
                # acc_list.append(correct / total)

                # Update data
                step_var.set("Step: {0}/{1}".format(data[0] + 1, total_step))
                epoch_var.set("Epoch: {0}/{1}".format(epoch + 1, num_epochs))
                loss_var.set("Loss: {:.3f}".format(loss.item()))

                counter += 1
                if timer.elapsed_seconds_since_lap() > 0.3:
                    sps = float(counter) / timer.elapsed_seconds_since_lap()
                    rate_var.set("Rate: {:.2f} steps/s".format(sps))
                    timer.lap()
                    counter = 0

                    if sps == 0:
                        time_left = "NaN"
                    else:
                        time_left = int(((total_step * num_epochs) -
                                         ((float(data[0]) + 1.0) +
                                          (total_step * epoch))) / sps)
                        time_left = datetime.timedelta(seconds=time_left)
                        time_left = str(time_left)
                    time_var.set("Time left: {}".format(time_left))
                root.update()
                root.update_idletasks()

                loss_file.write("{}\n".format(loss.item()))

        # Now save the loss file and the weights
        loss_file.close()

        # Save the bias and weights
        torch.save(self.network.state_dict(), self.weights_save_dir)

        torch.cuda.empty_cache()

        status.set("Done")
        PlotIt(plot_loc)
        root.mainloop()
示例#6
0
class Controller:
    def __init__(self, weight1, weight2):
        """Acts as a controller object that sends and receives data from AirSim.

            This class acts as the interface between the network and the AirSim
            simulation inside of Unreal Engine. It is able to receive camera
            images from AirSim as well as send the driving commands to it and
            is responsible for running the network.

            Control Scheme:
                NUM_4   : Left
                NUM_8   : Forwards
                NUM_6   : Right
                Q       : Quit
                SPACE   : Reset

        Args:
            weight1 (str): Path to the segmentation network weights
            weight2 (str): Path to the fc network weights
        """
        # Initialize AirSim connection
        self.client = airsim.CarClient()
        self.client.confirmConnection()

        # Set up the network
        self.seg_net = SegmentationNetwork("googlenet")
        self.seg_net.eval()
        self.seg_net.cuda()
        if (weight2 is None) or (weight2 != ""):
            self.seg_only = True
        else:
            self.fcd = FCD()
            self.fcd.eval()
            self.fcd.cuda()
            self.seg_only = False
            self.client.enableApiControl(True)

        self.seg_out = None

        self.weight1 = weight1
        self.weight2 = weight2

        self.device = torch.device("cuda")

        # Set up timers for fps counting
        self._timer = Timer()
        self.counter = 0

        # Set up display variables
        self._display = None
        self._main_image = None
        self._overlay_image = None
        self.fps_text = "FPS: 0"
        self.direction_text = "Direction: Forwards"
        self._text_font = None

        self.set_segmentation_ids()

        # Directions:
        # -1 : Left
        # 0 : Forwards
        # 1 : Right
        self._direction = 0  # Direction defaults to forwards

        self.out = None  # Network output
        self.throttle = 0  # Throttle output

        self.max_throttle = 0.35  # Throttle limit

        # Quitting
        self._request_quit = False

    def execute(self):
        """"Launch PyGame."""
        pygame.init()
        self.__init_game()

        # Initialize fonts
        if not pygame.font.get_init():
            pygame.font.init()

        self._text_font = pygame.font.SysFont("helvetica", 24)

        while not self._request_quit:
            self.__on_loop()
            self.__on_render()

        if self._request_quit:
            pygame.display.quit()
            pygame.quit()
            self.client.enableApiControl(False)  # Give control back to user
            return

    def set_segmentation_ids(self):

        # rgb_file = open("seg_rgbs.txt", "r")
        # lines = rgb_file.readlines()
        # for l in lines:
        #     s = l.split('[')
        #     self.color_map[int(s[0].rstrip())] = eval('[' + s[1].rstrip())
        #     self.val_map[tuple(eval('[' + s[1].rstrip()))] = int(s[0].
        #                                                      rstrip())

        found = self.client.simSetSegmentationObjectID("[\w]*", 0, True)
        print("Reset all segmentations to zero: %r" % found)

        self.client.simSetSegmentationObjectID("ParkingAnnotRoad[\w]*", 22,
                                               True)
        self.client.simSetSegmentationObjectID("CrosswalksRoad[\w]*", 23, True)
        self.client.simSetSegmentationObjectID("Car[\w]*", 24, True)

        self.client.simSetSegmentationObjectID("GroundRoad[\w]*", 25, True)
        self.client.simSetSegmentationObjectID("SolidMarkingRoad[\w]*", 26,
                                               True)
        self.client.simSetSegmentationObjectID("DashedMarkingRoad[\w]*", 27,
                                               True)
        self.client.simSetSegmentationObjectID("StopLinesRoad[\w]*", 28, True)
        self.client.simSetSegmentationObjectID("ParkingLinesRoad[\w]*", 29,
                                               True)
        self.client.simSetSegmentationObjectID("JunctionsAnnotRoad[\w]*", 30,
                                               True)

        for i in range(max_lanes):
            self.client.simSetSegmentationObjectID(
                "LaneRoadAnnot" + str(i) + "[\w]*", i + 31, True)

    def __init_game(self):
        """Initializes the PyGame window and creates a new episode.

            This is separate from the main init method because the init is
            intended to create an instance of the class, but not to start
            the game objects yet.
        """
        self.__on_reset()

        self._display = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT),
                                                pygame.HWSURFACE
                                                | pygame.DOUBLEBUF)

        self.seg_net.load_state_dict(torch.load(self.weight1))
        if not self.seg_only:
            self.fcd.load_state_dict(torch.load(self.weight2))

        print("PyGame started")

    def __on_reset(self):
        """Resets the state of the client."""
        print("Resetting client")
        self.client.reset()
        self._timer = Timer()

    def __on_loop(self):
        """Commands to execute on every loop."""
        # Make time tick
        self._timer.tick()

        # Get an image from Unreal
        response = self.client.simGetImages([
            airsim.ImageRequest("0", airsim.ImageType.Segmentation, False,
                                False)
        ])
        rgb = None
        if response:
            rgb = self.__response_to_cv(response[0], 3)
            self._main_image = rgb

        # Get key presses and parse them
        events = pygame.event.get()

        for event in events:
            if event.type == pygame.KEYDOWN:
                self.__parse_event(event)
            if event.type == pygame.QUIT:
                self._request_quit = True

        # Run the network
        # First convert the images to tensors
        rgb = self.__to_tensor(rgb).float().to(self.device)

        self.seg_out = self.seg_net.forward(torch.unsqueeze(rgb, 0))
        self._overlay_image = self.seg_out.cpu().detach()[0].numpy()
        self._overlay_image = self._overlay_image.transpose(1, 2, 0)

        if not self.seg_only:
            # Flatten
            x = self.seg_out.view(-1, self._num_flat_features(self.seg_out))

            # Analyze for steering
            x = self.fcd(x, [0, [self._direction]])

            # get its data, then to numpy, then to a tuple
            self.out = tuple(x.cpu().detach().numpy())

            # Now send the command to airsim
            if MAX_THROTTLE_ONLY:
                self.throttle = self.max_throttle
            else:
                self.throttle = self.out[0][1]
            self.__send_command((self.out[0][0], self.throttle))

        # Computation is now complete. Add to the counter.
        self.counter += 1

        # Determine then update direction
        if self._direction == 0:
            direction = "Forward"
        elif self._direction == -1:
            direction = "Left"
        else:
            direction = "Right"
        self.direction_text = "Direction: {0}".format(direction)

        # Update FPS
        if self._timer.elapsed_seconds_since_lap() > 0.3:
            # Determine FPS
            fps = float(self.counter) / self._timer.elapsed_seconds_since_lap()

            # Update the info
            self.fps_text = "FPS: {0}".format(int(fps))

            # Reset counters
            self.counter = 0
            self._timer.lap()

        pygame.display.update()  # Finally, update the display.

    def __on_render(self):
        """Renders the pygame window itself."""
        if self._main_image is not None and self._overlay_image is not None:
            # If there is an image in the pipeline, render it.
            img = self.__overlay_images(self._main_image, self._overlay_image)
            surface_main = pygame.surfarray.make_surface(img.swapaxes(0, 1))
            self._display.blit(surface_main, (0, 0))

        # Render white fill to "reset" the text
        self._display.fill((255, 255, 255),
                           rect=pygame.Rect(0, 64, WINDOW_WIDTH,
                                            WINDOW_HEIGHT - 64))

        # Create the text in the window
        surface_fps = self._text_font.render(self.fps_text, True, (0, 0, 0))
        surface_direction = self._text_font.render(self.direction_text, True,
                                                   (0, 0, 0))

        if self.out is None:
            self.out = (np.array([0], dtype="float32"), )
        surface_steering = self._text_font.render(
            "Steering: %.2f" % self.out[0][0], True, (0, 0, 0))
        surface_throttle = self._text_font.render(
            "Throttle: %.2f" % self.throttle, True, (0, 0, 0))

        # And now render that text
        self._display.blit(surface_fps, (6, 70))
        self._display.blit(surface_direction, (120, 70))
        self._display.blit(surface_steering, (6, 100))
        self._display.blit(surface_throttle, (6, 130))

    @staticmethod
    def __response_to_cv(r, channels):
        if r.compress:
            image = cv2.imdecode(
                np.fromstring(r.image_data_uint8, dtype=np.uint8), 1)
            image = image.reshape(r.height, r.width, channels)
            image = cv2.cvtColor(image[:, :, 0:channels], cv2.COLOR_RGB2BGR)

        else:
            image = np.frombuffer(r.image_data_uint8, dtype=np.uint8)
            image = image.reshape(r.height, r.width, channels + 1)
            image = image[:, :, 0:channels]
        return image

    @staticmethod
    def __overlay_images(image1, image2):
        """Overlays using linear dodge.

        Args:
            image1 (np.ndarray): First image.
            image2 (np.ndarray): Second image. The one that will become the
            overlay.

        Returns:
            (np.ndarray) The overlayed image
        """
        shape = image1.shape
        output = np.ndarray([shape[0], shape[1], shape[2]])
        for i in range(shape[0]):
            for j in range(shape[1]):
                img2_color_array = [0, 0, 0]
                index_max = np.argmax(image2[i][j])
                if index_max == 1:
                    img2_color_array = np.array([0, 0, 255])
                elif index_max == 2:
                    img2_color_array = np.array([255, 0, 0])
                for k in range(shape[2]):
                    output[i][j][k] = max(image1[i][j][k], img2_color_array[k])

        return output

    def __parse_event(self, event):
        """Parses PyGame events.

        Args:
            event (pygame.Event): The PyGame event to be parsed.
        """
        if event.key == K_KP8:
            self._direction = 0
        elif event.key == K_KP4:
            self._direction = -1
        elif event.key == K_KP6:
            self._direction = 1
        elif event.key == K_SPACE:
            self.__on_reset()
        elif event.key == K_q:
            self._request_quit = True

    def __send_command(self, command):
        """Sends driving commands over the AirSim API.

        Args:
            command (tuple): A tuple in the form (steering, throttle).
        """
        car_control = airsim.CarControls()
        car_control.steering = float(command[0])
        car_control.throttle = command[1]
        self.client.setCarControls(car_control)

    @staticmethod
    def __to_tensor(image):
        """Turns an image into a tensor

        Args:
            image: The image to be converted as an array of uncompressed bits.

        Returns:
            (torch.Tensor) the image as a tensor.
        """
        image = image.transpose(2, 0, 1)

        return torch.from_numpy(image)

    @staticmethod
    def _num_flat_features(x):
        """Multiplies the number of features for flattening a convolution.

        References:
            https://pytorch.org/tutorials/beginner/blitz/neural_networks_
            tutorial.html
        """
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
示例#7
0
class CarlaGame(object):
    def __init__(self, carla_client, args):
        self.client = carla_client
        self._carla_settings, self._intrinsic, self._camera_to_car_transform, self._lidar_to_car_transform = make_carla_settings(
            args)
        self._timer = None
        self._display = None
        self._main_image = None
        self._mini_view_image1 = None
        self._mini_view_image2 = None
        self._enable_autopilot = args.autopilot
        self._lidar_measurement = None
        self._map_view = None
        self._is_on_reverse = False
        self._city_name = args.map_name
        self._map = CarlaMap(self._city_name, 16.43,
                             50.0) if self._city_name is not None else None
        self._map_shape = self._map.map_image.shape if self._city_name is not None else None
        self._map_view = self._map.get_map(
            WINDOW_HEIGHT) if self._city_name is not None else None
        self._position = None
        self._agent_positions = None
        self.captured_frame_no = self.current_captured_frame_num()
        self._measurements = None
        self._extrinsic = None
        # To keep track of how far the car has driven since the last capture of data
        self._agent_location_on_last_capture = None
        self._frames_since_last_capture = 0
        # How many frames we have captured since reset
        self._captured_frames_since_restart = 0

    def current_captured_frame_num(self):
        # Figures out which frame number we currently are on
        # This is run once, when we start the simulator in case we already have a dataset.
        # The user can then choose to overwrite or append to the dataset.
        label_path = os.path.join(OUTPUT_FOLDER, 'label_2/')
        num_existing_data_files = len(
            [name for name in os.listdir(label_path) if name.endswith('.txt')])
        print(num_existing_data_files)
        if num_existing_data_files == 0:
            return 0
        answer = input(
            "There already exists a dataset in {}. Would you like to (O)verwrite or (A)ppend the dataset? (O/A)"
            .format(OUTPUT_FOLDER))
        if answer.upper() == "O":
            logging.info(
                "Resetting frame number to 0 and overwriting existing")
            # Overwrite the data
            return 0
        logging.info("Continuing recording data on frame number {}".format(
            num_existing_data_files))
        return num_existing_data_files

    def execute(self):
        """Launch the PyGame."""
        pygame.init()
        self._initialize_game()
        try:
            while True:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        return
                reset = self._on_loop()
                if not reset:
                    self._on_render()
        finally:
            pygame.quit()

    def _initialize_game(self):
        if self._city_name is not None:
            self._display = pygame.display.set_mode(
                (WINDOW_WIDTH + int(
                    (WINDOW_HEIGHT / float(self._map.map_image.shape[0])) *
                    self._map.map_image.shape[1]), WINDOW_HEIGHT),
                pygame.HWSURFACE | pygame.DOUBLEBUF)
        else:
            self._display = pygame.display.set_mode(
                (WINDOW_WIDTH, WINDOW_HEIGHT),
                pygame.HWSURFACE | pygame.DOUBLEBUF)

        logging.debug('pygame started')
        self._on_new_episode()

    def _on_new_episode(self):
        self._carla_settings.randomize_seeds()
        self._carla_settings.randomize_weather()
        scene = self.client.load_settings(self._carla_settings)
        number_of_player_starts = len(scene.player_start_spots)
        player_start = np.random.randint(number_of_player_starts)
        logging.info('Starting new episode...')
        self.client.start_episode(player_start)
        self._timer = Timer()
        self._is_on_reverse = False

        # Reset all tracking variables
        self._agent_location_on_last_capture = None
        self._frames_since_last_capture = 0
        self._captured_frames_since_restart = 0

    def _on_loop(self):
        self._timer.tick()
        measurements, sensor_data = self.client.read_data()
        # logging.info("Frame no: {}, = {}".format(self.captured_frame_no,
        #                                         (self.captured_frame_no + 1) % NUM_RECORDINGS_BEFORE_RESET))
        # Reset the environment if the agent is stuck or can't find any agents or if we have captured enough frames in this one
        is_stuck = self._frames_since_last_capture >= NUM_EMPTY_FRAMES_BEFORE_RESET
        is_enough_datapoints = (self._captured_frames_since_restart +
                                1) % NUM_RECORDINGS_BEFORE_RESET == 0

        if (is_stuck or is_enough_datapoints) and GEN_DATA:
            logging.warning("Is stucK: {}, is_enough_datapoints: {}".format(
                is_stuck, is_enough_datapoints))
            self._on_new_episode()
            # If we dont sleep, the client will continue to render
            return True

        # (Extrinsic) Rt Matrix
        # (Camera) local 3d to world 3d.
        # Get the transform from the player protobuf transformation.
        world_transform = Transform(measurements.player_measurements.transform)
        # Compute the final transformation matrix.
        self._extrinsic = world_transform * self._camera_to_car_transform
        self._measurements = measurements
        self._last_player_location = measurements.player_measurements.transform.location
        self._main_image = sensor_data.get('CameraRGB', None)
        self._lidar_measurement = sensor_data.get('Lidar32', None)
        self._depth_image = sensor_data.get('DepthCamera', None)
        # Print measurements every second.
        if self._timer.elapsed_seconds_since_lap() > 1.0:
            if self._city_name is not None:
                # Function to get car position on map.
                map_position = self._map.convert_to_pixel([
                    measurements.player_measurements.transform.location.x,
                    measurements.player_measurements.transform.location.y,
                    measurements.player_measurements.transform.location.z
                ])
                # Function to get orientation of the road car is in.
                lane_orientation = self._map.get_lane_orientation([
                    measurements.player_measurements.transform.location.x,
                    measurements.player_measurements.transform.location.y,
                    measurements.player_measurements.transform.location.z
                ])

                MeasurementsDisplayHelper.print_player_measurements_map(
                    measurements.player_measurements, map_position,
                    lane_orientation, self._timer)
            else:
                MeasurementsDisplayHelper.print_player_measurements(
                    measurements.player_measurements, self._timer)
            # Plot position on the map as well.
            self._timer.lap()

        control = self._get_keyboard_control(pygame.key.get_pressed())
        # Set the player position
        if self._city_name is not None:
            self._position = self._map.convert_to_pixel([
                measurements.player_measurements.transform.location.x,
                measurements.player_measurements.transform.location.y,
                measurements.player_measurements.transform.location.z
            ])
            self._agent_positions = measurements.non_player_agents

        if control is None:
            self._on_new_episode()
        elif self._enable_autopilot:
            self.client.send_control(
                measurements.player_measurements.autopilot_control)
        else:
            self.client.send_control(control)

    def _get_keyboard_control(self, keys):
        """
        Return a VehicleControl message based on the pressed keys. Return None
        if a new episode was requested.
        """
        control = KeyboardHelper.get_keyboard_control(keys,
                                                      self._is_on_reverse,
                                                      self._enable_autopilot)
        if control is not None:
            control, self._is_on_reverse, self._enable_autopilot = control
        return control

    def _on_render(self):
        datapoints = []

        if self._main_image is not None and self._depth_image is not None:
            # Convert main image
            image = image_converter.to_rgb_array(self._main_image)

            # Retrieve and draw datapoints
            image, datapoints = self._generate_datapoints(image)

            # Draw lidar
            # Camera coordinate system is left, up, forwards
            if VISUALIZE_LIDAR:
                # Calculation to shift bboxes relative to pitch and roll of player
                rotation = self._measurements.player_measurements.transform.rotation
                pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw
                # Since measurements are in degrees, convert to radians

                pitch = degrees_to_radians(pitch)
                roll = degrees_to_radians(roll)
                yaw = degrees_to_radians(yaw)
                print('pitch: ', pitch)
                print('roll: ', roll)
                print('yaw: ', yaw)

                # Rotation matrix for pitch
                rotP = np.array([[cos(pitch), 0, sin(pitch)], [0, 1, 0],
                                 [-sin(pitch), 0, cos(pitch)]])
                # Rotation matrix for roll
                rotR = np.array([[1, 0, 0], [0, cos(roll), -sin(roll)],
                                 [0, sin(roll), cos(roll)]])

                # combined rotation matrix, must be in order roll, pitch, yaw
                rotRP = np.matmul(rotR, rotP)
                # Take the points from the point cloud and transform to car space
                point_cloud = np.array(
                    self._lidar_to_car_transform.transform_points(
                        self._lidar_measurement.data))
                point_cloud[:, 2] -= LIDAR_HEIGHT_POS
                point_cloud = np.matmul(rotRP, point_cloud.T).T
                # print(self._lidar_to_car_transform.matrix)
                # print(self._camera_to_car_transform.matrix)
                # Transform to camera space by the inverse of camera_to_car transform
                point_cloud_cam = self._camera_to_car_transform.inverse(
                ).transform_points(point_cloud)
                point_cloud_cam[:, 1] += LIDAR_HEIGHT_POS
                image = lidar_utils.project_point_cloud(
                    image, point_cloud_cam, self._intrinsic, 1)

            # Display image
            surface = pygame.surfarray.make_surface(image.swapaxes(0, 1))
            self._display.blit(surface, (0, 0))
            if self._map_view is not None:
                self._display_agents(self._map_view)
            pygame.display.flip()

            # Determine whether to save files
            distance_driven = self._distance_since_last_recording()
            #print("Distance driven since last recording: {}".format(distance_driven))
            has_driven_long_enough = distance_driven is None or distance_driven > DISTANCE_SINCE_LAST_RECORDING
            if (self._timer.step + 1) % STEPS_BETWEEN_RECORDINGS == 0:
                if has_driven_long_enough and datapoints:
                    # Avoid doing this twice or unnecessarily often
                    if not VISUALIZE_LIDAR:
                        # Calculation to shift bboxes relative to pitch and roll of player
                        rotation = self._measurements.player_measurements.transform.rotation
                        pitch, roll, yaw = rotation.pitch, rotation.roll, rotation.yaw
                        # Since measurements are in degrees, convert to radians

                        pitch = degrees_to_radians(pitch)
                        roll = degrees_to_radians(roll)
                        yaw = degrees_to_radians(yaw)
                        print('pitch: ', pitch)
                        print('roll: ', roll)
                        print('yaw: ', yaw)

                        # Rotation matrix for pitch
                        rotP = np.array([[cos(pitch), 0,
                                          sin(pitch)], [0, 1, 0],
                                         [-sin(pitch), 0,
                                          cos(pitch)]])
                        # Rotation matrix for roll
                        rotR = np.array([[1, 0, 0], [0,
                                                     cos(roll), -sin(roll)],
                                         [0, sin(roll),
                                          cos(roll)]])

                        # combined rotation matrix, must be in order roll, pitch, yaw
                        rotRP = np.matmul(rotR, rotP)
                        # Take the points from the point cloud and transform to car space
                        point_cloud = np.array(
                            self._lidar_to_car_transform.transform_points(
                                self._lidar_measurement.data))
                        point_cloud[:, 2] -= LIDAR_HEIGHT_POS
                        point_cloud = np.matmul(rotRP, point_cloud.T).T
                    self._update_agent_location()
                    # Save screen, lidar and kitti training labels together with calibration and groundplane files
                    self._save_training_files(datapoints, point_cloud)
                    self.captured_frame_no += 1
                    self._captured_frames_since_restart += 1
                    self._frames_since_last_capture = 0
                else:
                    logging.debug(
                        "Could save datapoint, but agent has not driven {} meters since last recording (Currently {} meters)"
                        .format(DISTANCE_SINCE_LAST_RECORDING,
                                distance_driven))
            else:
                self._frames_since_last_capture += 1
                logging.debug(
                    "Could not save training data - no visible agents of selected classes in scene"
                )

    def _distance_since_last_recording(self):
        if self._agent_location_on_last_capture is None:
            return None
        cur_pos = vector3d_to_array(
            self._measurements.player_measurements.transform.location)
        last_pos = vector3d_to_array(self._agent_location_on_last_capture)

        def dist_func(x, y):
            return sum((x - y)**2)

        return dist_func(cur_pos, last_pos)

    def _update_agent_location(self):
        self._agent_location_on_last_capture = self._measurements.player_measurements.transform.location

    def _generate_datapoints(self, image):
        """ Returns a list of datapoints (labels and such) that are generated this frame together with the main image image """
        datapoints = []
        image = image.copy()

        # Remove this
        rotRP = np.identity(3)
        # Stores all datapoints for the current frames
        for agent in self._measurements.non_player_agents:
            if should_detect_class(agent) and GEN_DATA:
                image, kitti_datapoint = create_kitti_data_point(
                    agent, self._intrinsic, self._extrinsic.matrix, image,
                    self._depth_image, self._measurements.player_measurements,
                    rotRP)
                if kitti_datapoint:
                    datapoints.append(kitti_datapoint)

        return image, datapoints

    def _save_training_files(self, datapoints, point_cloud):
        logging.info(
            "Attempting to save at timer step {}, frame no: {}".format(
                self._timer.step, self.captured_frame_no))
        groundplane_fname = GROUNDPLANE_PATH.format(self.captured_frame_no)
        lidar_fname = LIDAR_PATH.format(self.captured_frame_no)
        kitti_fname = LABEL_PATH.format(self.captured_frame_no)
        img_fname = IMAGE_PATH.format(self.captured_frame_no)
        calib_filename = CALIBRATION_PATH.format(self.captured_frame_no)

        save_groundplanes(groundplane_fname,
                          self._measurements.player_measurements,
                          LIDAR_HEIGHT_POS)
        save_ref_files(OUTPUT_FOLDER, self.captured_frame_no)
        save_image_data(img_fname,
                        image_converter.to_rgb_array(self._main_image))
        save_kitti_data(kitti_fname, datapoints)
        save_lidar_data(lidar_fname, point_cloud, LIDAR_HEIGHT_POS,
                        LIDAR_DATA_FORMAT)
        save_calibration_matrices(calib_filename, self._intrinsic,
                                  self._extrinsic)

    def _display_agents(self, image):
        image = image[:, :, :3]
        new_window_width = (float(WINDOW_HEIGHT) / float(self._map_shape[0])) * \
            float(self._map_shape[1])
        surface = pygame.surfarray.make_surface(image.swapaxes(0, 1))
        w_pos = int(self._position[0] *
                    (float(WINDOW_HEIGHT) / float(self._map_shape[0])))
        h_pos = int(self._position[1] *
                    (new_window_width / float(self._map_shape[1])))
        pygame.draw.circle(surface, [255, 0, 0, 255], (w_pos, h_pos), 6, 0)
        for agent in self._agent_positions:
            if agent.HasField('vehicle'):
                agent_position = self._map.convert_to_pixel([
                    agent.vehicle.transform.location.x,
                    agent.vehicle.transform.location.y,
                    agent.vehicle.transform.location.z
                ])
                w_pos = int(agent_position[0] *
                            (float(WINDOW_HEIGHT) / float(self._map_shape[0])))
                h_pos = int(agent_position[1] *
                            (new_window_width / float(self._map_shape[1])))
                pygame.draw.circle(surface, [255, 0, 255, 255], (w_pos, h_pos),
                                   4, 0)
        self._display.blit(surface, (WINDOW_WIDTH, 0))