Пример #1
0
    def __init__(self, env, actor, critic, DISCOUNT_FACTOR, MINIBATCH_SIZE,
                 REPLAY_MEMORY_SIZE, TARGET_DISCOUNT, continue_execution,
                 MEMORIES):
        # Environment details
        self.env = env
        self.actor = actor
        self.critic = critic
        self.MINIBATCH_SIZE = MINIBATCH_SIZE
        self.DISCOUNT = DISCOUNT_FACTOR
        self.TARGET_DISCOUNT = TARGET_DISCOUNT
        self.bg_noise = None
        self.action_dim = self.env.action_space.shape[0]

        # Replay memory to store experiences of the model with the environment
        # self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
        if continue_execution:
            self.replay_memory = memory.Memory(REPLAY_MEMORY_SIZE,
                                               load=continue_execution,
                                               memories=MEMORIES)
        else:
            self.replay_memory = memory.Memory(REPLAY_MEMORY_SIZE)
            _state += list(states[i]) + list(actions[i])
        _state += list(states[len(states) - 1])
        return states, actions, np.asarray(tuple(_state))

    #start iterating from 'current epoch'
    for episode in xrange(CURRENT_EPISODE + 1, EPISODES + 1, 1):
        done = False

        first_state = env.reset()
        first_action = np.array([0, 0, 0])
        states = [first_state, first_state, first_state]
        actions = [first_action, first_action]
        states, actions, cur_state = make_state(states, actions, first_state,
                                                first_action)

        action_memory = memory.Memory(STEPS)
        episode_reward = 0
        episode_step = 0
        new_episode = True
        while not done:
            action, action_step = actor_critic.act(cur_state, new_episode,
                                                   GREEDY_RATE)
            _next_state, reward, done, _ = env.step(action_step)

            states, actions, next_state = make_state(states, actions,
                                                     _next_state, action)

            episode_reward += reward

            action_memory.addMemory(cur_state, action, reward, next_state,
                                    done)