Exemplo n.º 1
0
    agent.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

    # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
    # Plus, we also want to display after each training episode (!= than after every training) the average bellman
    # residual and the average of the V values obtained during the last episode, hence the two last arguments.
    agent.attach(
        bc.TrainerController(evaluate_on='action',
                             periodicity=parameters.update_frequency,
                             show_episode_avg_V_value=True,
                             show_avg_Bellman_residual=True))

    # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we
    # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
    agent.attach(
        bc.LearningRateController(
            initial_learning_rate=parameters.learning_rate,
            learning_rate_decay=parameters.learning_rate_decay,
            periodicity=1))

    # Same for the discount factor.
    agent.attach(
        bc.DiscountFactorController(
            initial_discount_factor=parameters.discount,
            discount_factor_growth=parameters.discount_inc,
            discount_factor_max=parameters.discount_max,
            periodicity=1))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(
Exemplo n.º 2
0
                        replay_memory_size=min(
                            int(args.epochs[0] * args.epochs[1] * 1.1),
                            100000),
                        batch_size=32,
                        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    agent.attach(
        bc.EpsilonController(initial_e=0.8,
                             e_decays=args.epochs[0] * args.epochs[1],
                             e_min=0.2))
    agent.attach(
        bc.LearningRateController(args.learning_rate[0], args.learning_rate[1],
                                  args.learning_rate[2]))
    agent.attach(
        bc.InterleavedTestEpochController(epoch_length=1000,
                                          controllers_to_disable=[1, 2, 3, 4]))
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env,
                          batch_size=32,
                          double_Q=True,
                          freeze_interval=args.epochs[1],
                          random_state=rng)
    agent = NeuralAgent(
        env,
        network,
        train_policy=GaussianNoiseExplorationPolicy(
            network, env.nActions(), rng, .5) if args.exploration == 'gauss'
        else EpsilonGreedyPolicy(network, env.nActions(), rng, 0.1),
Exemplo n.º 3
0
     evaluateOn='epoch', 
     periodicity=1))
 
 # During training epochs, we want to train the agent after every [parameters.update_frequency] action it takes.
 # Plus, we also want to display after each training episode (!= than after every training) the average bellman
 # residual and the average of the V values obtained during the last episode, hence the two last arguments.
 agent.attach(bc.TrainerController(
     evaluateOn='action', 
     periodicity=parameters.update_frequency, 
     showEpisodeAvgVValue=True, 
     showAvgBellmanResidual=True))
 
 # Every epoch end, one has the possibility to modify the learning rate using a LearningRateController. Here we 
 # wish to update the learning rate after every training epoch (periodicity=1), according to the parameters given.
 agent.attach(bc.LearningRateController(
     initialLearningRate=parameters.learning_rate, 
     learningRateDecay=parameters.learning_rate_decay,
     periodicity=1))
 
 # Same for the discount factor.
 agent.attach(bc.DiscountFactorController(
     initialDiscountFactor=parameters.discount, 
     discountFactorGrowth=parameters.discount_inc, 
     discountFactorMax=parameters.discount_max,
     periodicity=1))
 
 # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
 # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
 # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
 # episode or epoch (or never, hence the resetEvery='none').
 agent.attach(bc.EpsilonController(
     initialE=parameters.epsilon_start, 
Exemplo n.º 4
0
    def __init__(self,
                 inputDims,
                 q_network,
                 actions,
                 file='',
                 replay_memory_size=Parameters.REPLAY_MEMORY_SIZE,
                 replay_start_size=Parameters.BATCH_SIZE,
                 batch_size=Parameters.BATCH_SIZE,
                 random_state=np.random.RandomState(),
                 exp_priority=0,
                 batch_type='sequential',
                 train_policy=None,
                 test_policy=None,
                 only_full_history=True,
                 reward_as_input=False):

        CompleteLearner.__init__(self, actions, file)
        self.polfile = open(self.file + 'policy.txt', "w")
        # if replay_start_size == None:
        #     replay_start_size = max(inputDims[i][0] for i in range(len(inputDims)))
        # elif replay_start_size < max(inputDims[i][0] for i in range(len(inputDims))):
        #     raise AgentError("Replay_start_size should be greater than the biggest history of a state.")

        self._controllers = []

        # --- Bind controllers to the agent ---
        # For comments, please refer to run_toy_env.py
        self.attach(bc.VerboseController(evaluate_on='epoch', periodicity=1))

        self.attach(
            bc.TrainerController(evaluate_on='action',
                                 periodicity=Parameters.UPDATE_FREQUENCY,
                                 show_episode_avg_V_value=True,
                                 show_avg_Bellman_residual=True))

        self.attach(
            bc.LearningRateController(
                initial_learning_rate=Parameters.LEARNING_RATE,
                learning_rate_decay=Parameters.LEARNING_RATE_DECAY,
                periodicity=10000))

        self.attach(
            bc.DiscountFactorController(
                initial_discount_factor=Parameters.DISCOUNT,
                discount_factor_growth=Parameters.DISCOUNT_INC,
                discount_factor_max=Parameters.DISCOUNT_MAX,
                periodicity=10000))

        self.attach(
            bc.EpsilonController(initial_e=Parameters.EPSILON_START,
                                 e_decays=Parameters.EPSILON_DECAY,
                                 e_min=Parameters.EPSILON_MIN,
                                 evaluate_on='action',
                                 periodicity=1000,
                                 reset_every='none'))

        # self.attach(bc.InterleavedTestEpochController(
        #     id=0,
        #     epoch_length=Parameters.STEPS_PER_TEST,
        #     controllers_to_disable=[0, 1, 2, 3, 4],
        #     periodicity=2,
        #     show_score=True,
        #     summarize_every=-1))

        self.obs = []
        self.reward_as_input = reward_as_input
        self._network = q_network
        self._replay_memory_size = replay_memory_size
        self._replay_start_size = replay_start_size  # make sure you gather this many observations before learning
        self._batch_size = batch_size
        self._batch_type = batch_type
        self._random_state = random_state
        self._exp_priority = exp_priority
        self._only_full_history = only_full_history
        #inputDimensions, n_actions, observation_type = np.float32, random_state = None, max_size = 1000, batch_type = 'random', only_full_history = True
        self._dataset = DataSet(inputDimensions=inputDims,
                                n_actions=len(actions),
                                max_size=replay_memory_size,
                                random_state=random_state,
                                batch_type=self._batch_type,
                                only_full_history=self._only_full_history)
        self._tmp_dataset = None  # Will be created by startTesting() when necessary
        self._mode = -1
        self._mode_epochs_length = 0
        self._total_mode_reward = 0
        self._training_loss_averages = []
        self._Vs_on_last_episode = []
        #self._in_episode = False

        self._selected_action = -1
        self._state = []
        for i in range(len(inputDims)):
            self._state.append(np.zeros(inputDims[i], dtype=config.floatX))
        if (train_policy == None):
            self._train_policy = EpsilonGreedyPolicy(q_network, len(actions),
                                                     random_state, 0.05)
        else:
            self._train_policy = train_policy
        if (test_policy == None):
            self._test_policy = EpsilonGreedyPolicy(q_network, len(actions),
                                                    random_state, 0.)
        else:
            self._test_policy = test_policy

        self.initEpisode()