Пример #1
0
    def plot_predictions(self, cfg, tub_paths, model_path):
        """
        Plot model predictions for angle and throttle against data from tubs.

        """
        from donkeycar.parts.datastore import TubGroup
        from donkeycar.parts.keras import KerasLinear #KerasCategorical
        import pandas as pd
        import matplotlib.pyplot as plt

        tg = TubGroup(tub_paths)

        model_path = os.path.expanduser(model_path)
#        model = KerasCategorical()
        model = KerasLinear()
        model.load(model_path)

#        gen = tg.get_batch_gen(None, batch_size=len(tg.df),shuffle=False, df=tg.df)
#        gen = tg.get_batch_gen(None, None, batch_size=len(tg.df),shuffle=False, df=tg.df)
#        arr = next(gen)

        user_angles = []
        user_throttles = []
        pilot_angles = []
        pilot_throttles = []

        for tub in tg.tubs:
            num_records = tub.get_num_records()
            for iRec in tub.get_index(shuffled=False):
                record = tub.get_record(iRec)

                img = record["cam/image_array"]
                user_angle = float(record["user/angle"])
                user_throttle = float(record["user/throttle"])
                pilot_angle, pilot_throttle = model.run(img)

                user_angles.append(user_angle)
                user_throttles.append(user_throttle)
                pilot_angles.append(pilot_angle)
                pilot_throttles.append(pilot_throttle)

        angles_df = pd.DataFrame({'user_angle': user_angles, 'pilot_angle': pilot_angles})
        throttles_df = pd.DataFrame({'user_throttle': user_throttles, 'pilot_throttle': pilot_throttles})

        fig = plt.figure()

        title = "Model Predictions\nTubs: {}\nModel: {}".format(tub_paths, model_path)
        fig.suptitle(title)

        ax1 = fig.add_subplot(211)
        ax2 = fig.add_subplot(212)

        angles_df.plot(ax=ax1)
        throttles_df.plot(ax=ax2)

        ax1.legend(loc=4)
        ax2.legend(loc=4)

        plt.show()
Пример #2
0
from donkeycar.parts.keras import KerasLinear
from PIL import Image
import numpy as np

input_shape = (120, 160, 3)
roi_crop = (0, 0)

kl = KerasLinear(input_shape=input_shape, roi_crop=roi_crop)
#kl.load('/home/pi/mycar/models/mypilot20.h5')
img = Image.open('./528.jpg')
img = np.array(img)
#print(img.shape)
out = kl.run(img)
print(out)
Пример #3
0
    def learn(self,
              total_timesteps,
              callback=None,
              log_interval=1,
              tb_log_name="SAC",
              print_freq=100,
              save_path=None):

        with TensorboardWriter(self.graph, self.tensorboard_log,
                               tb_log_name) as writer:

            self._setup_learn()

            # Transform to callable if needed
            self.learning_rate = get_schedule_fn(self.learning_rate)

            start_time = time.time()
            episode_rewards = [0.0]
            is_teleop_env = hasattr(self.env, "wait_for_teleop_reset")
            # TeleopEnv
            if is_teleop_env:
                print("Waiting for teleop")
                obs = self.env.wait_for_teleop_reset()
            else:
                obs = self.env.reset()

            self.episode_reward = np.zeros((1, ))
            ep_info_buf = deque(maxlen=100)
            ep_len = 0
            self.n_updates = 0
            infos_values = []
            mb_infos_vals = []
            model_path = "--Path to model--/myNewModdel.h5"
            # model_path=  None
            if model_path is not None:
                cfg = dk.load_config(
                    config_path='--Path to config file inside mycar/config.py')
                kl = KerasLinear()
                kl.load(model_path)
                # vae = self.env.get_vae()
            self.training_started = False
            self.start_training = False
            for step in range(total_timesteps):
                # Compute current learning_rate
                frac = 1.0 - step / total_timesteps
                current_lr = self.learning_rate(frac)

                if callback is not None:
                    # Only stop training if return value is False, not when it is None. This is for backwards
                    # compatibility with callbacks that have no return statement.
                    if callback(locals(), globals()) is False:
                        break

                # Before training starts, randomly sample actions
                # from a uniform distribution for better exploration.
                # Afterwards, use the learned policy.
                if step < self.learning_starts and not self.training_started:
                    if model_path is not None:
                        try:
                            img_arr = self.env.get_images()
                            # print(img_arr[0].shape)
                            img_arr = np.asarray(img_arr[0])
                            img_arr = normalize_and_crop(img_arr, cfg)
                            croppedImgH = img_arr.shape[0]
                            croppedImgW = img_arr.shape[1]
                            if img_arr.shape[2] == 3 and cfg.IMAGE_DEPTH == 1:
                                img_arr = dk.utils.rgb2gray(img_arr).reshape(
                                    croppedImgH, croppedImgW, 1)
                            steering, throttle = kl.run(img_arr)
                            action = [steering, throttle / 6.0]
                            action = np.asarray(action)
                            # rescaled_action = action * np.abs(self.action_space.low)
                            rescaled_action = action
                            print('Predicted action :', action)
                        except Exception as e:
                            print(e)
                            action = self.env.action_space.sample()
                            rescaled_action = action
                    else:
                        action = self.env.action_space.sample()
                        rescaled_action = action
                        print(action)
                    # No need to rescale when sampling random action
                elif not self.training_started:
                    self.start_training = True
                    obs = self.env.reset()
                else:

                    action = self.policy_tf.step(
                        obs[None], deterministic=False).flatten()
                    # Rescale from [-1, 1] to the correct bounds
                    rescaled_action = action * np.abs(self.action_space.low)

                assert action.shape == self.env.action_space.shape

                new_obs, reward, done, info = self.env.step(rescaled_action)
                ep_len += 1

                if print_freq > 0 and ep_len % print_freq == 0 and ep_len > 0:
                    print("{} steps".format(ep_len))

                # Store transition in the replay buffer.
                self.replay_buffer.add(obs, action, reward, new_obs,
                                       float(done))
                obs = new_obs

                # Retrieve reward and episode length if using Monitor wrapper
                maybe_ep_info = info.get('episode')
                if maybe_ep_info is not None:
                    ep_info_buf.extend([maybe_ep_info])

                if writer is not None:
                    # Write reward per episode to tensorboard
                    ep_reward = np.array([reward]).reshape((1, -1))
                    ep_done = np.array([done]).reshape((1, -1))
                    self.episode_reward = total_episode_reward_logger(
                        self.episode_reward, ep_reward, ep_done, writer, step)

                if ep_len > self.train_freq:
                    print("Additional training")
                    self.env.reset()
                    mb_infos_vals = self.optimize(step, writer, current_lr)
                    done = True

                episode_rewards[-1] += reward
                if done or self.start_training:
                    self.start_training = False
                    if not (isinstance(self.env, VecEnv) or is_teleop_env):
                        obs = self.env.reset()

                    print("Episode finished. Reward: {:.2f} {} Steps".format(
                        episode_rewards[-1], ep_len))
                    episode_rewards.append(0.0)
                    ep_len = 0
                    mb_infos_vals = self.optimize(step, writer, current_lr)

                    # Refresh obs when using TeleopEnv
                    if is_teleop_env:
                        print("Waiting for teleop")
                        obs = self.env.wait_for_teleop_reset()

                # Log losses and entropy, useful for monitor training
                if len(mb_infos_vals) > 0:
                    infos_values = np.mean(mb_infos_vals, axis=0)

                if len(episode_rewards[-101:-1]) == 0:
                    mean_reward = -np.inf
                else:
                    mean_reward = round(
                        float(np.mean(episode_rewards[-101:-1])), 1)

                num_episodes = len(episode_rewards)
                if self.verbose >= 1 and done and log_interval is not None and len(
                        episode_rewards) % log_interval == 0:
                    fps = int(step / (time.time() - start_time))
                    logger.logkv("episodes", num_episodes)
                    logger.logkv("mean 100 episode reward", mean_reward)
                    logger.logkv(
                        'ep_rewmean',
                        safe_mean([ep_info['r'] for ep_info in ep_info_buf]))
                    logger.logkv(
                        'eplenmean',
                        safe_mean([ep_info['l'] for ep_info in ep_info_buf]))
                    logger.logkv("n_updates", self.n_updates)
                    logger.logkv("current_lr", current_lr)
                    logger.logkv("fps", fps)
                    logger.logkv('time_elapsed',
                                 "{:.2f}".format(time.time() - start_time))
                    if len(infos_values) > 0:
                        for (name, val) in zip(self.infos_names, infos_values):
                            logger.logkv(name, val)
                    logger.logkv("total timesteps", step)
                    logger.dumpkvs()
                    # Reset infos:
                    infos_values = []
            if is_teleop_env:
                self.env.is_training = False
            # Use last batch
            print("Final optimization before saving")
            self.env.reset()
            mb_infos_vals = self.optimize(step, writer, current_lr)

            plt.figure(1)
            plt.plot(episode_rewards)
            plt.title('Episode Rewards')
            plt.ylabel("Reward")
            plt.xlabel("Epoch")
            filename = "training" + str(random.random()) + ".png"
            plt.savefig(filename)
            plt.show()
        return self