Пример #1
0
		action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.02 * np.ones(n_actions), theta=0.15, dt=0.01, initial_noise=None)
		model = TD3(MlpPolicyTD3, 
				env, 
				action_noise=action_noise,
				verbose=1,
				policy_kwargs   = dict(layers=[400,300]) ,
				tensorboard_log = workDirectory+"/log"
				)

	model.test_env = DummyVecEnv([lambda:  e.AidaBulletEnv(commands,
													  render  = False, 
													  on_rack = False,
													  default_reward     = args.default_reward,
													  height_weight      = args.height_weight,
													  orientation_weight = args.orientation_weight,
													  direction_weight   = args.direction_weight,

													  speed_weight       = args.speed_weight,
													  mimic_weight       = args.mimic_weight,
													  consistancy_weight = args.consistancy_weight,
													  logReward = True
													  )
							])
	if normalize:
		model.test_env = VecNormalize(model.test_env, gamma=args.gamma)
	
					

	def callback(_locals, _globals):
		"""
		Callback for monitoring learning progress.
		:param _locals: (dict)
Пример #2
0
    def objective(trial):

        kwargs = hyperparams.copy()

        trial.model_class = None

        kwargs.update(sample_sac_params(trial))

        def callback(_locals, _globals):
            """
            Callback for monitoring learning progress.
            :param _locals: (dict)
            :param _globals: (dict)
            :return: (bool) If False: stop training
            """
            self_ = _locals['self']
            trial = self_.trial

            # Initialize variables
            if not hasattr(self_, 'is_pruned'):
                self_.is_pruned = False
                self_.last_mean_test_reward = -np.inf
                self_.last_time_evaluated = 0
                self_.eval_idx = 0

            if (self_.num_timesteps -
                    self_.last_time_evaluated) < evaluate_interval:
                return True

            self_.last_time_evaluated = self_.num_timesteps

            # Evaluate the trained agent on the test env
            rewards = []
            n_steps_done, reward_sum = 0, 0.0

            # Sync the obs rms if using vecnormalize
            # NOTE: this does not cover all the possible cases
            if isinstance(self_.test_env, VecNormalize):
                self_.test_env.obs_rms = deepcopy(self_.env.obs_rms)
                self_.test_env.ret_rms = deepcopy(self_.env.ret_rms)
                # Do not normalize reward
                self_.test_env.norm_reward = False

            obs = self_.test_env.reset()
            while n_steps_done < n_test_steps:
                # Use default value for deterministic
                action, _ = self_.predict(obs, )
                obs, reward, done, _ = self_.test_env.step(action)
                reward_sum += reward
                n_steps_done += 1

                if done:
                    rewards.append(reward_sum)
                    reward_sum = 0.0
                    obs = self_.test_env.reset()
            rewards.append(reward_sum)
            mean_reward = np.mean(rewards)
            summary = tf.Summary(value=[
                tf.Summary.Value(tag='evaluation', simple_value=mean_reward)
            ])
            _locals['writer'].add_summary(summary, self_.num_timesteps)
            self_.last_mean_test_reward = mean_reward
            self_.eval_idx += 1

            # report best or report current ?
            # report num_timesteps or elasped time ?
            trial.report(-1 * mean_reward, self_.eval_idx)
            # Prune trial if need
            if trial.should_prune(self_.eval_idx):
                self_.is_pruned = True
                return False

            return True

        commands = [[1, 0], [2, 0], [3, 0]]
        env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=False,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=4)
        ])

        model = SAC(MlpPolicy,
                    env,
                    gamma=kwargs['gamma'],
                    learning_rate=kwargs['learning_rate'],
                    batch_size=kwargs['batch_size'],
                    buffer_size=kwargs['buffer_size'],
                    learning_starts=kwargs['learning_starts'],
                    train_freq=kwargs['train_freq'],
                    gradient_steps=kwargs['gradient_steps'],
                    ent_coef=kwargs['ent_coef'],
                    target_entropy=kwargs['target_entropy'],
                    policy_kwargs=kwargs['policy_kwargs'],
                    tensorboard_log="./optimisationSAC/logOPTI")

        model.test_env = DummyVecEnv([
            lambda: e.AidaBulletEnv(commands,
                                    render=False,
                                    on_rack=False,
                                    default_reward=2,
                                    height_weight=5,
                                    orientation_weight=3,
                                    direction_weight=2,
                                    speed_weight=4)
        ])

        model.trial = trial

        try:
            model.learn(n_timesteps,
                        callback=callback,
                        tb_log_name="SAC_" + str(trial.number))
            # Free memory
            model.env.close()
            model.test_env.close()
        except AssertionError:
            # Sometimes, random hyperparams can generate NaN
            # Free memory
            model.env.close()
            model.test_env.close()
            raise
        is_pruned = False
        cost = np.inf
        if hasattr(model, 'is_pruned'):
            is_pruned = model.is_pruned
            cost = -1 * model.last_mean_test_reward
        try:
            os.mkdir("./optimisationSAC/resultats/" + str(trial.number))
        except FileExistsError:
            print("Directory already exists")

        model.save("./optimisationSAC/resultats/" + str(trial.number) + "/" +
                   str(trial.number))

        del model.env, model.test_env
        del model

        if is_pruned:
            try:
                # Optuna >= 0.19.0
                raise optuna.exceptions.TrialPruned()
            except AttributeError:
                raise optuna.structs.TrialPruned()

        return cost