Пример #1
0
 def step(self, action):
     tic = time.time()
     # 1. Take action in the simulator based on the agents action choice
     self.do_action(action)
     
     # 2: Update the environment state variables after the action takes place
     self.pset_simulator_state_info() 
     
     # 3: Calculate reward
     reward, done = self.calc_reward()
     
     self.time_to_step = time.time() - tic
     print('Time to step: ', self.time_to_step)
     
     # Send the data collected off to gui
     self.send_to_gui(action, reward, done)
     
     if self.mode == "inertial":
         return (self.current_inertial_state, reward, done, self.extra_metadata)
     elif self.mode == "rgb":
         # Reduce dimentionality of obs
         self.obs4 = trim_append_state_vector(self.obs4, self.images_rgb[:,:,self.image_mask_rgb], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "rgb_normal":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "rgba":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "gray":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "gray_normal":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays() / 255, pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "both_rgb":
         self.obs4 = trim_append_state_vector(self.obs4, self.images_rgb[:,:, self.image_mask_rgb], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "both_rgb_normal":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2rgbs_normal(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "both_rgba": 
         self.obs4 = trim_append_state_vector(self.obs4, self.images_rgba[:,:, self.image_mask_rgba], pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "both_gray":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays(), pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata)
     elif self.mode == "both_gray_normal":
         self.obs4 = trim_append_state_vector(self.obs4, self.rgbs2grays()/ 255, pop_index = self.IMG_VIEWS * self.IMG_CHANNELS) # pop old and append new state obsv
         return (self.current_inertial_state, self.obs4, reward, done, self.extra_metadata)
     else:
         print("invalid Mode!")
Пример #2
0
def train_racing_car(primaryNetwork,
                     targetNetwork,
                     env,
                     directory,
                     NUM_EPISODES=1000,
                     EPISODE_ITERATIONS_MAX=100,
                     COPY_PERIOD=50,
                     EPSILON=1):

    # Set up GUI Video Feeder
    vehicle_names = ["Car1"]
    parentImgConn, childImgConn = mp.Pipe()
    parentStateConn, childStateConn = mp.Pipe()
    app = AirSimGUI.CarGUI(parentStateConn,
                           parentImgConn,
                           vehicle_names=vehicle_names,
                           num_video_feeds=IMG_VIEWS * IMG_STEP,
                           isRGB=False)

    COPY_MODEL_FLAG = False
    copy_model_iterator = 0
    for episode in range(NUM_EPISODES):
        print('Reset racing!', "Episode: ", episode)
        env.client.simPause(True)

        # Decay Epsilon to Greedy-Like
        ep = EPSILON / np.sqrt(episode + 1)
        if ep < .02:
            ep = .02

        # Episode iterator, copy model iterator and flag signaling to copy parameters or restart episode
        episode_iterator = 0
        DONE_FLAG = False

        # Train the model more now between rounds so delay is less
        #primaryNetwork.train(targetNetwork, 20) # num times

        # Save Model Weights
        if (episode + 1) % 10 == 0:
            primaryNetwork.save_session(directory)

        # Reset s*tates for the next training round
        obs4 = np.zeros(
            (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS * IMG_STEP * IMG_VIEWS))

        env.client.simPause(False)
        # Returns inertial state vector, the image observation, and meta data (Time ellapsed)
        car_state, obs, meta = env.reset()

        env.client.simPause(True)
        # Copy model if we have reached copy_period rounds.
        if COPY_MODEL_FLAG:
            print('Copying model')
            targetNetwork.copy_from(primaryNetwork)
            copy_model_iterator = 0
            COPY_MODEL_FLAG = False
            print('Copying model complete!')
        env.client.simPause(False)

        #print(obs.shape, obs4.shape)
        obs4 = trim_append_state_vector(obs4,
                                        obs,
                                        pop_index=IMG_VIEWS * IMG_CHANNELS)

        # Return and Loss
        episode_return = 0
        episode_loss = 0

        while not DONE_FLAG and episode_iterator < EPISODE_ITERATIONS_MAX:
            episode_iterator += 1
            tic = time.time()

            # Sample action
            action = primaryNetwork.sample_action(obs4, ep)

            # Update Stacked State
            prev_obs4 = obs4
            if episode_iterator < 10:
                state, obs, reward, DONE_FLAG, meta = env.step(
                    env.action_num("Throttle Up"))  # new single observation
            else:
                state, obs, reward, DONE_FLAG, meta = env.step(
                    action)  # new single observation

            # Pause Sim
            env.client.simPause(True)

            # Reduce dimentionality of obs
            obs4 = trim_append_state_vector(
                obs4, obs, pop_index=IMG_VIEWS *
                IMG_CHANNELS)  # pop old and append new state obsv
            # Add new reward to running episode return
            episode_return += reward

            # Save new experience and train
            primaryNetwork.add_experience(prev_obs4, action, reward, obs4,
                                          DONE_FLAG)
            loss = primaryNetwork.train(
                targetNetwork)  # train by using the training model

            # Append Loss
            if loss is not None:  # Meaning, we have enough training exmaples to actually start training..
                episode_loss += loss

            # Increment Copy Model Iterator
            copy_model_iterator += 1

            # Signal a copy to the training model if we reach copy_period timesteps
            if copy_model_iterator % COPY_PERIOD == 0:
                COPY_MODEL_FLAG = True

            # Send off to the GUI Process!
            tic2 = time.time()
            d1 = dict.fromkeys(vehicle_names,
                               np.array(obs4 * 255, dtype=np.uint8))
            d2 = dict.fromkeys(vehicle_names, state)
            childImgConn.send(d1)  # Make it a image format (unsigned integers)
            childStateConn.send(d2)
            print("GUI Process Update Time: ", time.time() - tic2)

            # Print Output
            #print(state)
            print("Episode: ", episode, ", Iteration: ", episode_iterator,
                  ", Reward: ", reward, ", Loss: ", loss, ", Action: ",
                  env.action_name(action), ", Ellasped Time: ",
                  time.time() - tic)
            env.client.simPause(False)

            # Add Excel and GUI Listeners to Loop
        print("Total Episode Return: ", episode_return,
              ", Total Episode Loss: ", episode_loss)
Пример #3
0
def train_racing_drones(_vehicle_names,
                        primaryNetwork,
                        targetNetwork,
                        env,
                        directory,
                        NUM_EPISODES=1000,
                        EPISODE_ITERATIONS_MAX=100,
                        COPY_PERIOD=200,
                        EPSILON=1):

    # Set up GUI Video Feeder
    vehicle_names = _vehicle_names
    parentImgConn, childImgConn = mp.Pipe()
    parentStateConn, childStateConn = mp.Pipe()
    app = AirSimGUI.QuadcopterGUI(parentStateConn,
                                  parentImgConn,
                                  vehicle_names=vehicle_names,
                                  num_video_feeds=IMG_VIEWS * IMG_STEP,
                                  isRGB=False)

    COPY_MODEL_FLAG = False
    copy_model_iterator = 0
    for episode in range(NUM_EPISODES):
        print('Reset racing!', "Episode: ", episode)

        # Decay Epsilon to Greedy-Like
        ep = EPSILON / np.sqrt(episode + 1)
        if ep < .02:
            ep = .02

        # Episode iterator, copy model iterator and flag signaling to copy parameters or restart episode
        episode_iterator = 0
        DONE_FLAGs = dict.fromkeys(vehicle_names, False)

        env.client.simPause(True)
        # Copy model if we have reached copy_period rounds.
        if COPY_MODEL_FLAG:
            print('Copying model')
            targetNetwork.copy_from(primaryNetwork.CDQN)
            copy_model_iterator = 0
            COPY_MODEL_FLAG = False
            print('Copying model complete!')
            time.sleep(1)
        env.client.simPause(False)

        # Train the model more now between rounds so delay is less
        #primaryNetwork.train(targetNetwork, 20) # num times

        # Save Model Weights
        if (episode + 1) % 10 == 0:
            primaryNetwork.save_session(directory)

        # Reset s*tates for the next training round
        obs4s = dict.fromkeys(
            vehicle_names,
            np.zeros(
                (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS * IMG_STEP * IMG_VIEWS)))

        # Returns inertial state vector, the image observation, and meta data (Time ellapsed)
        states, obss, metas = env.reset()
        #print(obs.shape, obs4.shape)
        for vn in vehicle_names:
            obs4s[vn] = trim_append_state_vector(obs4s[vn],
                                                 obss[vn],
                                                 pop_index=IMG_VIEWS *
                                                 IMG_CHANNELS)

        # Return and Loss
        episode_return = 0
        episode_loss = 0

        while not np.sum(np.array(
                list(DONE_FLAGs.values()),
                dtype=np.int)) and episode_iterator < EPISODE_ITERATIONS_MAX:
            episode_iterator += 1
            tic = time.time()

            # Sample quad actions
            actions = primaryNetwork.sample_actions(obs4s, ep)

            # Update Stacked States
            prev_obs4s = obs4s
            states, obss, rewards, DONE_FLAGs, metas = env.steps(
                actions)  # new single observation

            # Pause Sim
            env.client.simPause(True)

            # Stack New Obsvs
            for vn in vehicle_names:
                obs4s[vn] = trim_append_state_vector(
                    obs4s[vn], obss[vn], pop_index=IMG_VIEWS *
                    IMG_CHANNELS)  # pop old and append new state obsv

            # Add new rewards to running episode return
            episode_return += np.sum(
                np.array([rewards[vn] for vn in vehicle_names]))

            # Save new experience and train
            primaryNetwork.add_experiences(prev_obs4s, actions, rewards, obs4s,
                                           DONE_FLAGs)
            loss = primaryNetwork.train(
                targetNetwork)  # train by using the training model

            # Append Loss
            if loss is not None:  # Meaning, we have enough training exmaples to actually start training..
                episode_loss += loss

            # Increment Copy Model Iterator
            copy_model_iterator += 1

            # Signal a copy to the training model if we reach copy_period timesteps
            if copy_model_iterator % COPY_PERIOD == 0:
                COPY_MODEL_FLAG = True

            # Send off to the GUI Process!
            tic2 = time.time()
            gui_obs = dict.fromkeys(vehicle_names)
            for vn in vehicle_names:
                gui_obs[vn] = np.array(obs4s[vn] * 255, dtype=np.uint8)
            childImgConn.send(
                gui_obs)  # Make it a image format (unsigned integers)
            childStateConn.send(states)
            print("GUI Process Update Time: ", time.time() - tic2)

            # Print Output
            #print(rewards,actions,DONE_FLAGs)
            print("Episode: ", episode, ", Iteration: ", episode_iterator,
                  ", Rewards: ", rewards[vehicle_names[0]], ", Loss: ", loss,
                  ", Action Dron 0: ", actions[vehicle_names[0]],
                  ", Ellasped Time: ",
                  time.time() - tic)
            env.client.simPause(False)  # Play

            # Add Excel and GUI Listeners to Loop
        print("Total Episode Return: ", episode_return,
              ", Total Episode Loss: ", episode_loss)
Пример #4
0
    def reset(self):
        # Reset the Car
        print('Reseting Car')
        self.client.reset()
        self.client.confirmConnection()
        self.client.enableApiControl(True)
        print('Reset Complete')
        # Start Timer for episode's first step:
        #state = self.client.getCarState()
        state = self.client.simGetGroundTruthKinematics()
        self.initial_position = (state['position']['x_val'],
                                 state['position']['y_val'],
                                 state['position']['z_val'])

        self.initial_velocity = (state['linear_velocity']['x_val'],
                                 state['linear_velocity']['y_val'],
                                 state['linear_velocity']['z_val'])

        # Set the environment state and image properties from the simulator
        self.pset_simulator_state_info()

        # Reset throttle break and steer
        self.throttle = 0
        self.steering = 0
        self.brake = 0

        self.extra_metadata = {
            'action': 0,
            'action_name': 0,
            'env_state': {
                'resetting': True,
                'running': False
            },
            'mode': 0,
            'reward': 0,
            'done': 0,
            'times': {
                'act_time': 0,
                'sim_img_time': 0,
                'sim_state_time': 0,
                'reward_time': 0,
                'step_time': 0
            }
        }

        if self.mode == "inertial":
            return (self.current_inertial_state, self.extra_metadata)
        elif self.mode == "rgb":
            # Reduce dimentionality of obs
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgb[:, :, self.image_mask_rgb],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "rgb_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "rgba":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "gray":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "gray_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays() / 255,
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "both_rgb":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgb[:, :, self.image_mask_rgb],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_rgb_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_rgba":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgba[:, :, self.image_mask_rgba],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_gray":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_gray_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays() / 255,
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        else:
            print("invalid Mode!")
Пример #5
0
    def reset(self):
        # Reset the Copter
        print('Reseting Quad')
        self.client.reset()
        self.client.confirmConnection()
        self.client.enableApiControl(True)
        #self.client.simSetVehiclePose(Vector3r(0,0,-2), ignore_collison = True)

        # Quickly raise the quadcopter to a few meters -- Speed up
        for i in range(8):
            state = self.client.simGetGroundTruthKinematics()
            quad_vel = state['linear_velocity']
            quad_offset = (0, 0, -self.scaling_factor)
            self.client.moveByVelocityAsync(quad_vel['x_val'] + quad_offset[0],
                                            quad_vel['y_val'] + quad_offset[1],
                                            quad_vel['z_val'] + quad_offset[2],
                                            self.action_duration)
            time.sleep(self.action_duration)

        state = self.client.simGetGroundTruthKinematics()
        self.initial_position = (state['position']['x_val'],
                                 state['position']['y_val'],
                                 state['position']['z_val'] * -1)

        self.initial_velocity = (state['linear_velocity']['x_val'],
                                 state['linear_velocity']['y_val'],
                                 state['linear_velocity']['z_val'])

        print("Initial Quad Position: ", self.initial_position)
        print('Reset Complete')

        # Set the environment state and image properties from the simulator
        self.pset_simulator_state_info()

        self.extra_metadata = {
            'action': 0,
            'action_name': 0,
            'env_state': {
                'resetting': True,
                'running': False
            },
            'mode': 0,
            'reward': 0,
            'done': 0,
            'times': {
                'act_time': 0,
                'sim_img_time': 0,
                'sim_state_time': 0,
                'reward_time': 0,
                'step_time': 0
            }
        }

        if self.mode == "inertial":
            return (self.current_inertial_state, self.extra_metadata)
        elif self.mode == "rgb":
            # Reduce dimentionality of obs
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgb[:, :, self.image_mask_rgb],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "rgb_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "rgba":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "gray":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "gray_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays() / 255,
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.obs4, self.extra_metadata)
        elif self.mode == "both_rgb":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgb[:, :, self.image_mask_rgb],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_rgb_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2rgbs_normal(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_rgba":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.images_rgba[:, :, self.image_mask_rgba],
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_gray":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays(),
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        elif self.mode == "both_gray_normal":
            self.obs4 = trim_append_state_vector(
                self.obs4,
                self.rgbs2grays() / 255,
                pop_index=self.IMG_VIEWS *
                self.IMG_CHANNELS)  # pop old and append new state obsv
            return (self.current_inertial_state, self.obs4,
                    self.extra_metadata)
        else:
            print("invalid Mode!")