class LineFollower: """ Represents a line follower robot. """ def __init__(self, pose, controller_params, robot_params, sensor_params): """ Creates a line follower robot. Controller parameters: max_linear_speed_command: the linear speed commanded to the robot. kp: proportional gain of the angle controller. ki: integrative gain of the angle controller. kd: derivative gain of the angle controller. Robot parameters: sensor_offset: offset in x coordinate between the wheels' axle center and the sensor array center max_wheel_speed: maximum wheel speed wheel_radius: radius of the wheel wheels_distance: distance between wheels Sensor parameters: sensor_range: the measurement range in meters of each individual line sensor. num_sensors: the number of line sensors in the array. array_width: the width in meters of the array (distance from the first to the last sensor). :param pose: the initial pose of the robot. :type pose: Pose. :param controller_params: parameters used for the angle controller. :type controller_params: Params. :param robot_params: parameters used for the robot body. :type robot_params: Params. """ self.pose = pose # These variables are used to define reference speeds which will be fed to the wheels' dynamics self.reference_linear_speed = 0.0 self.reference_angular_speed = 0.0 # Since the robot control may have delays, we have to add issued commands to a buffer self.linear_speed_commands = [] self.angular_speed_commands = [] # Robot control delay self.delay = 2 self.max_linear_speed_command = controller_params.max_linear_speed_command # Collecting robot parameters self.sensor_offset = robot_params.sensor_offset self.max_wheel_speed = robot_params.max_wheel_speed self.wheel_radius = robot_params.wheel_radius self.wheels_distance = robot_params.wheels_distance # Since the maximum speed is actually saturated at the wheels, we compute the maximum # allowed angular speed for the desired linear speed max_wheel_linear = self.max_linear_speed_command / self.wheel_radius max_wheel_angular = clamp(self.max_wheel_speed - max_wheel_linear, 0.0, self.max_wheel_speed) # Creating the angle controller self.controller = DiscretePIDController(controller_params.kp, controller_params.ki, controller_params.kd, 2.0 * max_wheel_angular * self.wheel_radius / self.wheels_distance, ROBOT_SAMPLE_TIME) # Creating the line sensor self.line_sensor = LineSensorArray(sensor_params) # Low pass filters are used to simulate the wheels' dynamics self.left_wheel_dynamics = LowPassFilter(robot_params.wheel_bandwidth, SIMULATION_SAMPLE_TIME) self.right_wheel_dynamics = LowPassFilter(robot_params.wheel_bandwidth, SIMULATION_SAMPLE_TIME) # In order to simulate the fact that the robot control frequency may be slower than the simulation frequency, # we define a control frequency divider. self.control_frequency_divider = round(ROBOT_SAMPLE_TIME / SIMULATION_SAMPLE_TIME) self.iteration = 0 def reset(self, pose, controller_params=None): """ Resets the line follower robot. Changing controller parameters is optional. If no controller parameters is passed when calling this method, the previous controller parameters will be maintained. :param pose: the pose of the robot after the reset. :type pose: Pose. :param controller_params: new controller parameters. :type controller_params: Params. """ self.pose = pose # If new controller parameters where passed, we also update the controller parameters. if controller_params is not None: self.max_linear_speed_command = controller_params.max_linear_speed_command self.controller.set_gains(controller_params.kp, controller_params.ki, controller_params.kd) # To guarantee that the robot behavior will be reproducible, we have to reset all variables which # influence its dynamics self.left_wheel_dynamics.reset() self.right_wheel_dynamics.reset() self.controller.reset() self.linear_speed_commands = [] self.angular_speed_commands = [] self.reference_linear_speed = 0.0 self.reference_angular_speed = 0.0 self.iteration = 0 def unicycle_to_wheels(self, linear_speed, angular_speed): """ Converts from speeds of the unicycle model to wheels' speeds :param linear_speed: linear speed. :type linear_speed: float. :param angular_speed: angular speed. :type angular_speed: float. :return right_speed: speed of the right wheel. :rtype right_speed: float. :return left_speed: speed of the left wheel. :rtype left_speed: float. """ right_speed = (1.0 / self.wheel_radius) * (linear_speed + angular_speed * self.wheels_distance / 2.0) left_speed = (1.0 / self.wheel_radius) * (linear_speed - angular_speed * self.wheels_distance / 2.0) return right_speed, left_speed def wheels_to_unicycle(self, right_speed, left_speed): """ Converts from wheels' speeds of the unicycle model. :param right_speed: speed of the right wheel. :type right_speed: float. :param left_speed: speed of the left wheel. :type left_speed: float. :return linear_speed: linear speed. :rtype linear_speed: float. :return angular_speed: angular speed. :rtype angular_speed: float. """ linear_speed = (right_speed + left_speed) * self.wheel_radius / 2.0 angular_speed = (right_speed - left_speed) * self.wheel_radius / self.wheels_distance return linear_speed, angular_speed def get_sensors_global_positions(self): """ Obtains the positions of the sensors in the global coordinate system. :return: global positions of the sensors. :rtype: list of Vector2. """ sensor_center = Vector2(self.pose.position.x, self.pose.position.y) sensor_center.x += self.sensor_offset * cos(self.pose.rotation) sensor_center.y += self.sensor_offset * sin(self.pose.rotation) global_positions = [] for i in range(self.line_sensor.num_sensors): position = Vector2(sensor_center.x, sensor_center.y) position.x += -self.line_sensor.sensors_positions[i] * sin(self.pose.rotation) position.y += self.line_sensor.sensors_positions[i] * cos(self.pose.rotation) global_positions.append(position) return global_positions def set_line_sensor_intensity(self, intensity): """ Sets the intensity of the line sensor array. :param intensity: intensities measured by each line sensor. :type intensity: list of floats. """ self.line_sensor.set_intensity(intensity) def get_velocity(self): """ Obtains the unicycle velocity of the robot. :return: tuple containing the linear and angular speeds of the robot. :rtype: two-dimensional tuple of floats. """ right_speed = self.right_wheel_dynamics.yp left_speed = self.left_wheel_dynamics.yp return self.wheels_to_unicycle(right_speed, left_speed) def set_velocity(self, linear_speed, angular_speed): """ Registers a robot velocity command. Since the actuation system is delayed, the command may not be immediately executed. :param linear_speed: the robot's linear speed. :type linear_speed: float :param angular_speed: the robot's angular speed. :type angular_speed: float """ right_speed, left_speed = self.unicycle_to_wheels(linear_speed, angular_speed) right_speed = clamp(right_speed, -self.max_wheel_speed, self.max_wheel_speed) left_speed = clamp(left_speed, -self.max_wheel_speed, self.max_wheel_speed) linear, angular = self.wheels_to_unicycle(right_speed, left_speed) if len(self.linear_speed_commands) >= self.delay: self.reference_linear_speed = self.linear_speed_commands[-self.delay] if len(self.angular_speed_commands) >= self.delay: self.reference_angular_speed = self.angular_speed_commands[-self.delay] self.linear_speed_commands.append(linear) self.angular_speed_commands.append(angular) if len(self.linear_speed_commands) > self.delay: self.linear_speed_commands.pop(0) if len(self.angular_speed_commands) > self.delay: self.angular_speed_commands.pop(0) def control(self): """ Updates the robot controller. """ error, detected = self.line_sensor.get_error() angular_speed = self.controller.control(error) self.set_velocity(self.max_linear_speed_command, angular_speed) def move(self): """ Moves the robot during one time step. """ dt = SIMULATION_SAMPLE_TIME right_command, left_command = self.unicycle_to_wheels(self.reference_linear_speed, self.reference_angular_speed) right_speed = self.right_wheel_dynamics.filter(right_command) left_speed = self.left_wheel_dynamics.filter(left_command) v, w = self.wheels_to_unicycle(right_speed, left_speed) # If the angular speed is too low, the complete movement equation fails due to a division by zero. # Therefore, in this case, we use the equation we arrive if we take the limit when the angular speed # is close to zero. if fabs(w) < 1.0e-3: self.pose.position.x += v * dt * cos(self.pose.rotation + w * dt / 2.0) self.pose.position.y += v * dt * sin(self.pose.rotation + w * dt / 2.0) else: self.pose.position.x += (2.0 * v / w) * cos(self.pose.rotation + w * dt / 2.0) * sin(w * dt / 2.0) self.pose.position.y += (2.0 * v / w) * sin(self.pose.rotation + w * dt / 2.0) * sin(w * dt / 2.0) self.pose.rotation += w * dt def update(self): """ Updates the robot, including its controller. """ # Since the controller update frequency time is slower than the simulation frequency, # we update the controller only every control_frequency_divider cycles. if self.iteration % self.control_frequency_divider == 0: self.control() # To avoid overflow, we reset the iteration counter self.iteration = self.iteration % self.control_frequency_divider self.move() self.iteration += 1
class LineFollower: """ Represents a line follower robot. """ def __init__(self, pose, rl_algorithm, linear_speed, robot_params, sensor_params): """ Creates a line follower robot. Robot parameters: sensor_offset: offset in x coordinate between the wheels' axle center and the sensor array center max_wheel_speed: maximum wheel speed wheel_radius: radius of the wheel wheels_distance: distance between wheels Sensor parameters: sensor_range: the measurement range in meters of each individual line sensor. num_sensors: the number of line sensors in the array. array_width: the width in meters of the array (distance from the first to the last sensor). :param pose: the initial pose of the robot. :type pose: Pose. :param rl_algorithm: model-free reinforcement learning algorithm used for learning the line follower policy. :type rl_algorithm: RLAlgorithm. :param robot_params: parameters used for the robot body. :type robot_params: Params. :param sensor_params: parameters used for the line sensor. :type sensor_params: Params. """ self.pose = pose # These variables are used to define reference speeds which will be fed to the wheels' dynamics self.reference_linear_speed = 0.0 self.reference_angular_speed = 0.0 # Since the robot control may have delays, we have to add issued commands to a buffer self.linear_speed_commands = [] self.angular_speed_commands = [] # Robot control delay self.max_linear_speed_command = linear_speed # Collecting robot parameters self.sensor_offset = robot_params.sensor_offset self.max_wheel_speed = robot_params.max_wheel_speed self.wheel_radius = robot_params.wheel_radius self.wheels_distance = robot_params.wheels_distance # Since the maximum speed is actually saturated at the wheels, we compute the maximum # allowed angular speed for the desired linear speed max_wheel_linear = self.max_linear_speed_command / self.wheel_radius max_wheel_angular = clamp(self.max_wheel_speed - max_wheel_linear, 0.0, self.max_wheel_speed) self.max_angular_speed = 2.0 * max_wheel_angular * self.wheel_radius / self.wheels_distance self.rl_algorithm = rl_algorithm # Creating the line sensor self.line_sensor = LineSensorArray(sensor_params) # Low pass filters are used to simulate the wheels' dynamics self.left_wheel_dynamics = LowPassFilter(robot_params.wheel_bandwidth, SIMULATION_SAMPLE_TIME) self.right_wheel_dynamics = LowPassFilter(robot_params.wheel_bandwidth, SIMULATION_SAMPLE_TIME) # In order to simulate the fact that the robot control frequency may be slower than the simulation frequency, # we define a control frequency divider. self.control_frequency_divider = round(ROBOT_SAMPLE_TIME / SIMULATION_SAMPLE_TIME) self.iteration = 0 self.is_learning = True self.previous_sar = None self.discounted_cumulative_reward = 0.0 def reset(self, pose, is_learning=True): """ Resets the line follower robot. Changing controller parameters is optional. If no controller parameters is passed when calling this method, the previous controller parameters will be maintained. :param pose: the pose of the robot after the reset. :type pose: Pose. :param controller_params: new controller parameters. :type controller_params: Params. """ self.pose = pose # To guarantee that the robot behavior will be reproducible, we have to reset all variables which # influence its dynamics self.left_wheel_dynamics.reset() self.right_wheel_dynamics.reset() self.linear_speed_commands = [] self.angular_speed_commands = [] self.reference_linear_speed = 0.0 self.reference_angular_speed = 0.0 self.iteration = 0 self.is_learning = is_learning self.previous_sar = None self.discounted_cumulative_reward = 0.0 def unicycle_to_wheels(self, linear_speed, angular_speed): """ Converts from speeds of the unicycle model to wheels' speeds :param linear_speed: linear speed. :type linear_speed: float. :param angular_speed: angular speed. :type angular_speed: float. :return right_speed: speed of the right wheel. :rtype right_speed: float. :return left_speed: speed of the left wheel. :rtype left_speed: float. """ right_speed = (1.0 / self.wheel_radius) * ( linear_speed + angular_speed * self.wheels_distance / 2.0) left_speed = (1.0 / self.wheel_radius) * ( linear_speed - angular_speed * self.wheels_distance / 2.0) return right_speed, left_speed def wheels_to_unicycle(self, right_speed, left_speed): """ Converts from wheels' speeds of the unicycle model. :param right_speed: speed of the right wheel. :type right_speed: float. :param left_speed: speed of the left wheel. :type left_speed: float. :return linear_speed: linear speed. :rtype linear_speed: float. :return angular_speed: angular speed. :rtype angular_speed: float. """ linear_speed = (right_speed + left_speed) * self.wheel_radius / 2.0 angular_speed = (right_speed - left_speed) * self.wheel_radius / self.wheels_distance return linear_speed, angular_speed def get_sensors_global_positions(self): """ Obtains the positions of the sensors in the global coordinate system. :return: global positions of the sensors. :rtype: list of Vector2. """ sensor_center = Vector2(self.pose.position.x, self.pose.position.y) sensor_center.x += self.sensor_offset * cos(self.pose.rotation) sensor_center.y += self.sensor_offset * sin(self.pose.rotation) global_positions = [] for i in range(self.line_sensor.num_sensors): position = Vector2(sensor_center.x, sensor_center.y) position.x += -self.line_sensor.sensors_positions[i] * sin( self.pose.rotation) position.y += self.line_sensor.sensors_positions[i] * cos( self.pose.rotation) global_positions.append(position) return global_positions def set_line_sensor_intensity(self, intensity): """ Sets the intensity of the line sensor array. :param intensity: intensities measured by each line sensor. :type intensity: list of floats. """ self.line_sensor.set_intensity(intensity) def get_velocity(self): """ Obtains the unicycle velocity of the robot. :return: tuple containing the linear and angular speeds of the robot. :rtype: two-dimensional tuple of floats. """ right_speed = self.right_wheel_dynamics.yp left_speed = self.left_wheel_dynamics.yp return self.wheels_to_unicycle(right_speed, left_speed) def set_velocity(self, linear_speed, angular_speed): """ Registers a robot velocity command. Since the actuation system is delayed, the command may not be immediately executed. :param linear_speed: the robot's linear speed. :type linear_speed: float :param angular_speed: the robot's angular speed. :type angular_speed: float """ right_speed, left_speed = self.unicycle_to_wheels( linear_speed, angular_speed) right_speed = clamp(right_speed, -self.max_wheel_speed, self.max_wheel_speed) left_speed = clamp(left_speed, -self.max_wheel_speed, self.max_wheel_speed) linear, angular = self.wheels_to_unicycle(right_speed, left_speed) self.reference_linear_speed = linear self.reference_angular_speed = angular_speed def control(self): """ Updates the reinforcement learning algorithm. """ num_states = self.rl_algorithm.get_num_states() num_actions = self.rl_algorithm.get_num_actions() num_states_detected = num_states - 1 error, detected = self.line_sensor.get_error() if detected: # Discretizing the error to find the state error_normalized = (error + self.line_sensor.array_width / 2.0) / self.line_sensor.array_width state = round(error_normalized * (num_states_detected - 1)) reward = -(error / (self.line_sensor.array_width / 2.0))**2 else: state = num_states - 1 # consider the last state as the one when the robot does not detect the line reward = -5.0 # hardly penalizes the robot when it does not detect the line if self.is_learning: action = self.rl_algorithm.get_exploratory_action(state) else: action = self.rl_algorithm.get_greedy_action(state) if self.previous_sar is not None: # if (S, A, R) is None, then we are at the first iteration previous_state, previous_action, previous_reward = self.previous_sar self.rl_algorithm.learn(previous_state, previous_action, previous_reward, state, action) # Making the action continuous angular_speed = ( action / num_actions ) * 2.0 * self.max_angular_speed - self.max_angular_speed # Saving (S, A, R) for the next iteration self.previous_sar = (state, action, reward) # Computing the return of this episode self.discounted_cumulative_reward = self.rl_algorithm.gamma * self.discounted_cumulative_reward + reward self.set_velocity(self.max_linear_speed_command, angular_speed) def move(self): """ Moves the robot during one time step. """ dt = SIMULATION_SAMPLE_TIME right_command, left_command = self.unicycle_to_wheels( self.reference_linear_speed, self.reference_angular_speed) right_speed = self.right_wheel_dynamics.filter(right_command) left_speed = self.left_wheel_dynamics.filter(left_command) v, w = self.wheels_to_unicycle(right_speed, left_speed) # If the angular speed is too low, the complete movement equation fails due to a division by zero. # Therefore, in this case, we use the equation we arrive if we take the limit when the angular speed # is close to zero. if fabs(w) < 1.0e-3: self.pose.position.x += v * dt * cos(self.pose.rotation + w * dt / 2.0) self.pose.position.y += v * dt * sin(self.pose.rotation + w * dt / 2.0) else: self.pose.position.x += (2.0 * v / w) * cos(self.pose.rotation + w * dt / 2.0) * sin( w * dt / 2.0) self.pose.position.y += (2.0 * v / w) * sin(self.pose.rotation + w * dt / 2.0) * sin( w * dt / 2.0) self.pose.rotation += w * dt def update(self): """ Updates the robot, including its controller. """ # Since the controller update frequency time is slower than the simulation frequency, # we update the controller only every control_frequency_divider cycles. if self.iteration % self.control_frequency_divider == 0: self.control() # To avoid overflow, we reset the iteration counter self.iteration = self.iteration % self.control_frequency_divider self.move() self.iteration += 1