Exemplo n.º 1
0
 def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)):
     self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name,
                                    broker=mqtt_broker)
     self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name,
                                        broker=mqtt_broker)
     self.jpgToImg = JpgToImgArr()
     self.sensor_size = sensor_size
Exemplo n.º 2
0
class DonkeyRemoteContoller:
    def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)):
        self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name, broker=mqtt_broker)#, def_value=(np.zeros((40, 40)), {"cte": 0}))
        self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name, broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()
        self.sensor_size = sensor_size

    def get_sensor_size(self):
        return self.sensor_size

    def wait_until_connected(self):
        pass

    def take_action(self, action):
        self.controller_pub.run(action)

    def quit(self):
        self.camera_sub.shutdown()
        self.controller_pub.shutdown()

    def get_original_image(self):
        return self.img

    def observe(self):
        
        jpg = self.camera_sub.run()
        
        #data = self.camera_sub.run()
        # #print("State: {}".format(state))
        
        # #jpg, info = state
        self.img = self.jpgToImg.run(jpg)
        # return self.img#, info

        return self.img
Exemplo n.º 3
0
class DonkeyRemoteContoller:
    def __init__(self, donkey_name, mqtt_broker, sensor_size=(120, 160, 3)):
        self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name,
                                       broker=mqtt_broker)
        self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name,
                                           broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()
        self.sensor_size = sensor_size

    def get_sensor_size(self):
        return self.sensor_size

    def wait_until_connected(self):
        pass

    def take_action(self, action):
        self.controller_pub.run(action)

    def quit(self):
        self.camera_sub.shutdown()
        self.controller_pub.shutdown()

    def get_original_image(self):
        return self.img

    def observe(self):
        jpg = self.camera_sub.run()
        self.img = self.jpgToImg.run(jpg)
        return self.img
    def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker)
        self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
Exemplo n.º 5
0
    def __init__(self, alg_type, sim, car_name=args.car_name):
        self.args = define_config()
        self.agent = Dreamer(self.args)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.observation = torch.zeros(
            (1, 3, 64, 64))  # init observation, with batch dim
        self.belief = torch.zeros(1,
                                  self.args.belief_size,
                                  device=self.args.device)
        self.posterior_state = torch.zeros(1,
                                           self.args.state_size,
                                           device=self.args.device)
        self.action = torch.zeros(1,
                                  self.args.action_size,
                                  device=self.args.device)
        # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device)

        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")
    def __init__(self,
                 car_name="Kari",
                 mqtt_broker="mqtt.eclipse.org",
                 realsense=False,
                 env_type="DonkeyCar"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name,
                                       broker=mqtt_broker)

        if realsense:
            self.state_sub = MQTTValueSub("donkey/%s/state" % car_name,
                                          broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name,
                                           broker=mqtt_broker)

        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
        self.realsense = realsense
        self.sim = env_type != "DonkeyCar"
Exemplo n.º 7
0
import math
from docopt import docopt
import donkeycar as dk
import cv2

from donkeycar.parts.cv import CvImageView, ImgBGR2RGB, ImgRGB2BGR, ImageScale, ImgWriter, ArrowKeyboardControls
from donkeycar.parts.salient import SalientVis
from donkeycar.parts.network import MQTTValuePub, MQTTValueSub
from donkeycar.parts.transform import Lambda
from donkeycar.parts.image import JpgToImgArr

V = dk.vehicle.Vehicle()
args = docopt(__doc__)
print(args)

V.add(MQTTValueSub(name="donkey/%s/camera" % args["--name"], broker=args["--broker"]), outputs=["jpg"])
V.add(JpgToImgArr(), inputs=["jpg"], outputs=["img_arr"]) 
V.add(ImgBGR2RGB(), inputs=["img_arr"], outputs=["rgb"])
V.add(ImageScale(4.0), inputs=["rgb"], outputs=["lg_img"])
V.add(CvImageView(), inputs=["lg_img"])

V.add(ArrowKeyboardControls(), outputs=["control"])
V.add(MQTTValuePub(name="donkey/%s/controls" % args["--name"]), inputs=["control"])

record_path = args["--record"]
if record_path is not None:
    class ImageSaver:
        def __init__(self, path):
            self.index = 0
            self.path = path
        
Exemplo n.º 8
0
    threaded = True
    inputs = ['angle', 'throttle']

V.add(cam, inputs=inputs, outputs=['camera/arr'], threaded=threaded)

#V.add(cam, outputs=["camera/arr"], threaded=True)

img_to_jpg = ImgArrToJpg()
V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"])

pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME,
                       broker=cfg.MQTT_BROKER)
V.add(pub_cam, inputs=["camera/jpg"])

sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME,
                            def_value=(0., 0.),
                            broker=cfg.MQTT_BROKER)
V.add(sub_controls, outputs=["angle", "throttle"])

# steering_controller = PCA9685(cfg.STEERING_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM)
# steering = PWMSteering(controller=steering_controller,
#                                 left_pulse=cfg.STEERING_LEFT_PWM,
#                                 right_pulse=cfg.STEERING_RIGHT_PWM)

# throttle_controller = PCA9685(cfg.THROTTLE_CHANNEL, cfg.PCA9685_I2C_ADDR, busnum=cfg.PCA9685_I2C_BUSNUM)
# throttle = PWMThrottle(controller=throttle_controller,
#                                 max_pulse=cfg.THROTTLE_FORWARD_PWM,
#                                 zero_pulse=cfg.THROTTLE_STOPPED_PWM,
#                                 min_pulse=cfg.THROTTLE_REVERSE_PWM)

# V.add(steering, inputs=['angle'])
class DonkeyCarSpeed:
    
    def __init__(self, car="kari", mqtt_broker="mqtt.eclipse.org"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car, broker=mqtt_broker)
        self.state_sub = MQTTValueSub("donkey/%s/state" % car, broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car, broker=mqtt_broker)
        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
        

    def observe(self):

        self.img = self.jpgToImg.run(self.camera_sub.run())
        self.state = self.state_sub.run()
  
    def reset(self):
        self.controller_pub.run([0, 0])

        self.observe()
        time.sleep(1)

        return self.img
    
    def step(self, control, step_length):
        
        self.controller_pub.run(control)
        time.sleep(step_length)

        self.observe()

        v = self.state["v"]
        self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5

        done = self.is_dead()

        return self.img, done

    def is_dead(self):

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (self.img.shape[1] - 2 * crop_width) * crop_height * pixels_percentage

        #(im[:,:,0] > threshold) & (im[:,:,1] < 150) & (im[:,:,1] < 150)

        crop = self.img[-crop_height:, crop_width:-crop_width]
        #gs = np.dot(crop, [0.299, 0.587, 0.114])

        r = crop[:,:,0] < threshold
        g = crop[:,:,1] < threshold
        b = crop[:,:,2] < threshold

        pixels = (r & g & b).sum()
        
        #im = self.state[-crop_height:, crop_width:-crop_width]
        #gs = (im[:,:,0] > 150) & (im[:,:,1] < 150) & (im[:,:,1] < 150)

        #pixels = len(gs[gs])

        #pixels = len(gs[gs < threshold])

        print("Pixels: {}, Required: {}".format(pixels, pixels_required))
        
        return  pixels < pixels_required
Exemplo n.º 10
0
 def __init__(self, donkey_name, mqtt_broker):
     self.camera_sub = MQTTValueSub("donkey/%s/camera" % donkey_name,
                                    broker=mqtt_broker)
     self.controller_pub = MQTTValuePub("donkey/%s/controls" % donkey_name,
                                        broker=mqtt_broker)
     self.jpgToImg = JpgToImgArr()
Exemplo n.º 11
0
class RL_Agent():
    def __init__(self, alg_type, sim, car_name=args.car_name):
        self.args = define_config()
        self.agent = Dreamer(self.args)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.observation = torch.zeros(
            (1, 3, 64, 64))  # init observation, with batch dim
        self.belief = torch.zeros(1,
                                  self.args.belief_size,
                                  device=self.args.device)
        self.posterior_state = torch.zeros(1,
                                           self.args.state_size,
                                           device=self.args.device)
        self.action = torch.zeros(1,
                                  self.args.action_size,
                                  device=self.args.device)
        # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device)

        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")

    def reset(self, image):
        self.episode += 1

        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        # self.command_history = np.zeros(3*COMMAND_HISTORY_LENGTH)
        # self.state = np.vstack([image for x in range(FRAME_STACK)])
        self.belief = torch.zeros(1,
                                  self.args.belief_size,
                                  device=self.args.device)
        self.posterior_state = torch.zeros(1,
                                           self.args.state_size,
                                           device=self.args.device)
        self.action = torch.zeros(1,
                                  self.args.action_size,
                                  device=self.args.device)
        # self.act_history = torch.zeros(1, self.args.action_size*3, device=self.args.device)

        self.buffer_sent = False
        self.buffer_received = False
        self.params_sent = False
        self.params_received = False

    def train(self):
        #print(f"Training for {int(time.time() - self.training_start)} seconds")

        if (time.time() - self.training_start) > TRAINING_TIMEOUT:
            """Temporary fix for when sometimes the replay buffer fails to send"""
            self.training_start = time.time()
            self.buffers_sent = 0
            self.replay_buffer_pub.run((0, False))
            return False

        if len(self.replay_buffer) > 0:

            buffers_received = self.replay_buffer_received_sub.run()

            if self.buffers_sent == buffers_received:
                self.buffers_sent += 1
                self.replay_buffer_pub.run(
                    (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE]))
                print(
                    f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations"
                )
                self.replay_buffer = self.replay_buffer[BLOCK_SIZE:]

            return True

        if self.replay_buffer_received_sub.run() == self.buffers_sent:
            self.buffers_sent = 0
            self.replay_buffer_received_pub.run(0)
            self.replay_buffer_pub.run((0, False))

        new_params = self.param_sub.run()

        if not new_params:
            return True

        print("Received new params.")
        self.agent.import_parameters(new_params)
        self.param_pub.run(False)

        return False

    def run(self, image, speed=None):

        if not speed:
            self.speed = self.target_speed
        else:
            self.speed = speed

        if image is not None:
            self.image = image

        self.dead = self.is_dead(
            self.image) if not self.sim else self.is_dead_sim(self.image)

        if self.step > 0 and not self.training:
            """Save observation to replay buffer"""
            reward = 1 + (self.speed - self.args.throttle_min) / (
                self.args.throttle_max - self.args.throttle_min)
            # reward = min(reward, 2) / 2
            # reward = self.speed + 1
            done = self.dead
            reward = reward * -10 if self.dead else reward
            # reward = -self.speed - 10 if self.dead else reward
            # cv2.imwrite("./obs/img_{t}.png".format(t=self.step), self.image)
            next_observation = self.agent.process_im(self.image)

            # self.replay_buffer.append((self.observation,
            #                             self.action.cpu(),
            #                            reward,
            #                             done))

            self.replay_buffer.append(
                (next_observation, self.action.cpu(), reward, done))

            # next_command_history = np.roll(self.command_history, 3)
            # next_command_history[:3] = [self.steering, self.target_speed, self.speed]

            # next_state = np.roll(self.state, 1)
            # next_state[:1, :, :] = self.agent.process_im(self.image, IMAGE_SIZE, RGB)

            # self.replay_buffer.append([ [self.state, self.command_history],
            #                             [self.steering, self.target_speed],
            #                             [reward],
            #                             [next_state, next_command_history],
            #                             [float(not done)]])

            self.episode_reward += reward
            step_end = time.time()

            self.observation = next_observation  # obs is a tensor(3, 64, 64), img is a numpy (120, 180, 3)

            print(
                f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Steering, {self.steering:.2f}"
            )

            # self.state = next_state
            # self.command_history = next_command_history

            # print(f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}")

        if self.step > self.args.max_episodes_steps or (self.dead
                                                        and not self.training):
            self.training_start = time.time()

            self.step = 0
            self.steering = 0
            self.target_speed = 0

            self.training = True
            self.replay_buffer = self.replay_buffer[self.args.
                                                    skip_initial_steps:]
            return self.steering, self.target_speed, self.training

        if self.training:
            self.training = self.train()
            self.dead = False

            return self.steering, self.target_speed, self.training

        if self.step == 0:
            if not self.sim:
                input("Press Enter to start a new episode.")

            self.reset(self.agent.process_im(self.image))

        self.step += 1

        if self.step < self.args.skip_initial_steps:
            return 0.001, 0, False

        self.step_start = time.time()

        if self.episode <= self.args.random_episodes:
            self.steering = np.random.normal(0, 1)
            self.target_speed = 0.5
            self.action = torch.tensor([[self.steering, self.target_speed]],
                                       device=self.args.device)
        else:

            with torch.no_grad():
                self.belief, self.posterior_state = self.agent.infer_state(
                    self.observation.to(self.args.device),
                    action=self.action,
                    belief=self.belief,
                    state=self.posterior_state)
                self.action = self.agent.select_action(
                    (self.belief, self.posterior_state))
                # print("before limit", self.action)
                # maintain act_history
                # self.act_history = torch.roll(act_history, -args.action_size, dims=-1)
                # self.act_history[:, -args.action_size:] = action

                # to get steering and target_speed as numpy
                action = self.action.cpu().numpy(
                )  # act dim : [batch_size, act_size]
                # action = self.enforce_limits(action[0], self.steering)  # size [act_size]
                self.steering, self.target_speed = action[0][0], action[0][1]
                # self.action[0] = torch.tensor(action).to(self.action)
                # print("after limit ", self.action)
                ## didn't use enforce_limit yet
                # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0]) # TODO: change this

        return self.steering, self.target_speed, self.training

        # action = self.agent.select_action((self.state, self.command_history))

        # self.steering, self.target_speed = self.enforce_limits(action, self.command_history[0])

        # return self.steering, self.target_speed, self.training

    def is_dead(self, img):
        """
        Counts the black pixels from the ground and compares the amount to a threshold value.
        If there are not enough black pixels the car is assumed to be off the track.
        """

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        # print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self, img):

        crop_height = 40
        required = 0.8

        cropped = img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required

    def enforce_limits(self, action, prev_steering):
        """
        Scale the agent actions to environment limits
        """

        var = (self.args.throttle_max - self.args.throttle_min) / 2
        mu = (self.args.throttle_max + self.args.throttle_min) / 2

        steering_min = max(self.args.steer_limit_left,
                           prev_steering - self.args.max_steering_diff)
        steering_max = min(self.args.steer_limit_right,
                           prev_steering + self.args.max_steering_diff)

        steering = max(steering_min, min(steering_max, action[0]))

        return np.array([steering, action[1] * var + mu], dtype=np.float32)
class DonkeyRemoteEnv:
    def __init__(self,
                 car_name="Kari",
                 mqtt_broker="mqtt.eclipse.org",
                 realsense=False,
                 env_type="DonkeyCar"):

        self.camera_sub = MQTTValueSub("donkey/%s/camera" % car_name,
                                       broker=mqtt_broker)

        if realsense:
            self.state_sub = MQTTValueSub("donkey/%s/state" % car_name,
                                          broker=mqtt_broker)

        self.controller_pub = MQTTValuePub("donkey/%s/controls" % car_name,
                                           broker=mqtt_broker)

        self.jpgToImg = JpgToImgArr()

        self.img = np.zeros((40, 40))
        self.state = None
        self.speed = 0
        self.realsense = realsense
        self.sim = env_type != "DonkeyCar"

    def observe(self):

        self.img = self.jpgToImg.run(self.camera_sub.run())

        if self.realsense:
            self.state = self.state_sub.run()
            v = self.state["v"]
            self.speed = (v[0]**2 + v[1]**2 + v[2]**2)**0.5

    def reset(self):
        self.controller_pub.run([0, 0])

        self.observe()
        time.sleep(2)

        return self.img

    def step(self, control, step_length):
        print(control)
        self.controller_pub.run(control)
        time.sleep(step_length)

        self.observe()

        done = self.is_dead_sim() if self.sim else self.is_dead_real()

        return self.img, done

    def is_dead_real(self):

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (self.img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = self.img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self):

        crop_height = 40
        required = 0.8

        cropped = self.img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required
Exemplo n.º 13
0
V = dk.Vehicle()

print("starting up", cfg.DONKEY_UNIQUE_NAME, "for remote management.")

cam = PiCamera(image_w=cfg.IMAGE_W,
               image_h=cfg.IMAGE_H,
               image_d=cfg.IMAGE_DEPTH)
V.add(cam, outputs=["camera/arr"], threaded=True)

img_to_jpg = ImgArrToJpg()
V.add(img_to_jpg, inputs=["camera/arr"], outputs=["camera/jpg"])

pub_cam = MQTTValuePub("donkey/%s/camera" % cfg.DONKEY_UNIQUE_NAME)
V.add(pub_cam, inputs=["camera/jpg"])

sub_controls = MQTTValueSub("donkey/%s/controls" % cfg.DONKEY_UNIQUE_NAME,
                            def_value=(0., 0.))
V.add(sub_controls, outputs=["angle", "throttle"])

steering_controller = ServoBlaster(cfg.STEERING_CHANNEL)

#PWM pulse values should be in the range of 100 to 200
assert (cfg.STEERING_LEFT_PWM <= 200)
assert (cfg.STEERING_RIGHT_PWM <= 200)
steering = PWMSteering(controller=steering_controller,
                       left_pulse=cfg.STEERING_LEFT_PWM,
                       right_pulse=cfg.STEERING_RIGHT_PWM)

from donkeycar.parts.actuator import Mini_HBridge_DC_Motor_PWM
motor = Mini_HBridge_DC_Motor_PWM(cfg.HBRIDGE_PIN_FWD, cfg.HBRIDGE_PIN_BWD)

V.add(steering, inputs=['angle'])
Exemplo n.º 14
0
    def __init__(self,
                 alg_type,
                 sim,
                 car_name=args.car_name,
                 encoder_update=args.encoder_update,
                 max_episode_steps=MAX_EPISODE_STEPS):

        if encoder_update == "pixel":
            print("Using Pixel encoder")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = False

        if encoder_update == "ae":
            print("Using Autoencoder loss")
            PARAMS["sac"]["encoder_critic_loss"] = False
            PARAMS["sac"]["encoder_ae_loss"] = True

        if encoder_update == "aesac":
            print("Using autoencoder and critic loss")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = True

        self.agent = AE_SAC(PARAMS)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH)

        self.state = np.vstack([
            self.agent.process_im(self.image, IMAGE_SIZE, RGB)
            for x in range(FRAME_STACK)
        ])
        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []
        self.max_episode_steps = max_episode_steps

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")
Exemplo n.º 15
0
class RL_Agent():
    def __init__(self,
                 alg_type,
                 sim,
                 car_name=args.car_name,
                 encoder_update=args.encoder_update,
                 max_episode_steps=MAX_EPISODE_STEPS):

        if encoder_update == "pixel":
            print("Using Pixel encoder")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = False

        if encoder_update == "ae":
            print("Using Autoencoder loss")
            PARAMS["sac"]["encoder_critic_loss"] = False
            PARAMS["sac"]["encoder_ae_loss"] = True

        if encoder_update == "aesac":
            print("Using autoencoder and critic loss")
            PARAMS["sac"]["encoder_critic_loss"] = True
            PARAMS["sac"]["encoder_ae_loss"] = True

        self.agent = AE_SAC(PARAMS)
        self.sim = sim

        self.image = np.zeros((120, 160, 3))
        self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH)

        self.state = np.vstack([
            self.agent.process_im(self.image, IMAGE_SIZE, RGB)
            for x in range(FRAME_STACK)
        ])
        self.speed = 0

        self.step = 0
        self.episode = 0
        self.episode_reward = 0
        self.replay_buffer = []
        self.max_episode_steps = max_episode_steps

        self.target_speed = 0
        self.steering = 0

        self.training = False
        self.step_start = 0

        self.buffers_sent = False

        self.replay_buffer_pub = MQTTValuePub(car_name + "buffer",
                                              broker="mqtt.eclipse.org")
        self.replay_buffer_sub = MQTTValueSub(car_name + "buffer",
                                              broker="mqtt.eclipse.org",
                                              def_value=(0, True))

        self.replay_buffer_received_pub = MQTTValuePub(
            car_name + "buffer_received", broker="mqtt.eclipse.org")
        self.replay_buffer_received_sub = MQTTValueSub(
            car_name + "buffer_received",
            broker="mqtt.eclipse.org",
            def_value=0)

        self.param_pub = MQTTValuePub(car_name + "param",
                                      broker="mqtt.eclipse.org")
        self.param_sub = MQTTValueSub(car_name + "param",
                                      broker="mqtt.eclipse.org")

    def reset(self, image):
        self.episode += 1

        self.episode_reward = 0
        self.replay_buffer = []

        self.target_speed = 0
        self.steering = 0

        self.command_history = np.zeros(3 * COMMAND_HISTORY_LENGTH)
        self.state = np.vstack([image for x in range(FRAME_STACK)])
        self.buffer_sent = False
        self.buffer_received = False
        self.params_sent = False
        self.params_received = False

    def train(self):
        #print(f"Training for {int(time.time() - self.training_start)} seconds")

        if (time.time() - self.training_start) > TRAINING_TIMEOUT:
            """Temporary fix for when sometimes the replay buffer fails to send"""
            self.training_start = time.time()
            self.buffers_sent = 0
            self.replay_buffer_pub.run((0, False))
            return False

        if len(self.replay_buffer) > 0:

            buffers_received = self.replay_buffer_received_sub.run()

            if self.buffers_sent == buffers_received:
                self.buffers_sent += 1
                self.replay_buffer_pub.run(
                    (self.buffers_sent, self.replay_buffer[:BLOCK_SIZE]))
                print(
                    f"Sent {len(self.replay_buffer[:BLOCK_SIZE])} observations"
                )
                self.replay_buffer = self.replay_buffer[BLOCK_SIZE:]

            return True

        if self.replay_buffer_received_sub.run() == self.buffers_sent:
            self.buffers_sent = 0
            self.replay_buffer_received_pub.run(0)
            self.replay_buffer_pub.run((0, False))

        new_params = self.param_sub.run()

        if not new_params:
            return True

        print("Received new params.")
        self.agent.import_parameters(new_params)
        self.param_pub.run(False)

        return False

    def run(self, image, speed=None):

        if not speed:
            self.speed = self.target_speed
        else:
            self.speed = speed

        if image is not None:
            self.image = image

        self.dead = self.is_dead(
            self.image) if not self.sim else self.is_dead_sim(self.image)

        if self.step > 0 and not self.training:
            """Save observation to replay buffer"""
            if THROTTLE_MAX == THROTTLE_MIN:
                reward = 1
            else:
                reward = 1 + (self.speed - THROTTLE_MIN) / (THROTTLE_MAX -
                                                            THROTTLE_MIN)
                reward = min(reward, 2) / 2

            done = self.dead
            reward = reward * -10 if self.dead else reward

            next_command_history = np.roll(self.command_history, 3)
            next_command_history[:3] = [
                self.steering, self.target_speed, self.speed
            ]

            next_state = np.roll(self.state, IMAGE_CHANNELS)
            next_state[:IMAGE_CHANNELS, :, :] = self.agent.process_im(
                self.image, IMAGE_SIZE, RGB)

            self.replay_buffer.append([[self.state, self.command_history],
                                       [self.steering, self.target_speed],
                                       [reward],
                                       [next_state, next_command_history],
                                       [float(not done)]])

            self.episode_reward += reward
            step_end = time.time()

            self.state = next_state
            self.command_history = next_command_history

            print(
                f"Episode: {self.episode}, Step: {self.step}, Reward: {reward:.2f}, Episode reward: {self.episode_reward:.2f}, Step time: {(self.step_start - step_end):.2f}, Speed: {self.speed:.2f}, Throttle: {self.target_speed:.2f}"
            )

        if self.step > self.max_episode_steps or (self.dead
                                                  and not self.training):
            self.training_start = time.time()

            self.step = 0
            self.steering = 0
            self.target_speed = 0
            self.training = True
            self.replay_buffer = self.replay_buffer[SKIP_INITIAL_STEPS:]
            return self.steering, self.target_speed, self.training

        if self.training:

            self.training = self.train()
            self.dead = False

            return self.steering, self.target_speed, self.training

        if self.step == 0:
            if not self.sim:
                input("Press Enter to start a new episode.")

            self.reset(self.agent.process_im(self.image, IMAGE_SIZE, RGB))

        self.step += 1

        if self.step < SKIP_INITIAL_STEPS:
            return 0.001, 0, False

        self.step_start = time.time()

        #if self.episode < RANDOM_EPISODES:
        #    action = action_space.sample()
        #else:

        action = self.agent.select_action((self.state, self.command_history))

        self.steering, self.target_speed = self.enforce_limits(
            action, self.command_history[:2])

        return self.steering, self.target_speed, self.training

    def is_dead(self, img):
        """
        Counts the black pixels from the ground and compares the amount to a threshold value.
        If there are not enough black pixels the car is assumed to be off the track.
        """

        crop_height = 20
        crop_width = 20
        threshold = 70
        pixels_percentage = 0.10

        pixels_required = (img.shape[1] -
                           2 * crop_width) * crop_height * pixels_percentage

        crop = img[-crop_height:, crop_width:-crop_width]

        r = crop[:, :, 0] < threshold
        g = crop[:, :, 1] < threshold
        b = crop[:, :, 2] < threshold

        pixels = (r & g & b).sum()

        #print("Pixels: {}, Required: {}".format(pixels, pixels_required))

        return pixels < pixels_required

    def is_dead_sim(self, img):

        crop_height = 40
        required = 0.8

        cropped = img[-crop_height:]

        rgb = cropped[:, :, 0] > cropped[:, :, 2]

        return rgb.sum() / (crop_height * 160) > required

    def enforce_limits(self, action, prev_action):
        """
        Scale the agent actions to environment limits
        """

        prev_steering = prev_action[0]
        prev_throttle = prev_action[1]

        var = (THROTTLE_MAX - THROTTLE_MIN) / 2
        mu = (THROTTLE_MAX + THROTTLE_MIN) / 2

        raw_throttle = action[1] * var + mu

        steering = np.clip(action[0], prev_steering - MAX_STEERING_DIFF,
                           prev_steering + MAX_STEERING_DIFF)
        steering = np.clip(steering, STEER_LIMIT_LEFT, STEER_LIMIT_RIGHT)

        throttle = np.clip(raw_throttle, prev_throttle - MAX_THROTTLE_DIFF,
                           prev_throttle + MAX_THROTTLE_DIFF)

        return [steering, throttle]