Exemplo n.º 1
0
 def test_shape_multilabel_multiple(self):
     model = lambda: create_test_model(10, 10)
     model = FFNet(model)
     pred = model.predict_on_batch(np.ones((10, 10)))
     self.assertEqual(pred.shape, (10, 10))
Exemplo n.º 2
0
 def test_shape_binary(self):
     model = lambda: create_test_model(10, 1)
     model = FFNet(model)
     pred = model.predict_on_batch(np.ones((1, 10)))
     self.assertEqual(pred.shape, (1, 1))
Exemplo n.º 3
0
class Agent(object):
    '''An agent class used in testing reinforcement learning algorithms.

    This class is made with the purpose that it would allow multiple agents to
    be trained concurrently in a single game so the majority of their
    work should be hidden behind this class.
    '''
    def __init__(self,
                 model,
                 memory_size=1024,
                 Batch_size=32,
                 Gamma=0.99,
                 Epsilon=rangefloat(1.0, 0.1, 1e6),
                 K=1,
                 name='Agent'):
        '''Create Agent from model description file.'''
        self.Memory = deque(maxlen=memory_size)
        self.Batch_size = Batch_size
        self.Gamma = Gamma
        if type(Epsilon) is float or type(Epsilon) is int:
            self.Epsilon = Epsilon
            self.Epsilon_gen = None
        else:
            self.Epsilon_gen = Epsilon
            self.Epsilon = next(self.Epsilon_gen)
        self.K = K
        self.current_state = None
        self.current_action = None
        self.model = FFNet(model)
        self.terminal = False

    def initialize(self, current_state, current_action):
        self.current_state = current_state
        self.current_action = current_action
        self.terminal = False

    def chooseAction(self, time_step):
        '''Choose an action based on the current state.'''
        action = np.zeros(self.current_action.shape)
        if time_step % self.K == 0:
            if random.random() <= self.Epsilon:
                index = [random.randint(0, i - 1) for i in action.shape]
                action[index] = 1
            else:
                x = self.model.predict_on_batch(self.current_state)
                index = np.argmax(x)
                action[index] = 1
            self.current_action = action.astype(np.uint8)
        return self.current_action

    def chooseOptimal(self):
        action = np.zeros(self.current_action.shape)
        x = self.model.predict_on_batch(self.current_state)
        index = np.argmax(x)
        action[index] = 1
        return action

    def feedback(self, frame, reward, terminal):
        '''Receive feedback from Game.'''
        new_state = np.append(frame, self.current_state[..., 0:-1], axis=3)
        self.Memory.append((self.current_state, self.current_action, reward,
                            new_state, terminal))
        self.current_state = new_state
        self.terminal = terminal

    def isTerminal(self):
        return self.terminal

    def save(self, name):
        #self.model.save(name)
        pass

    def train(self):
        '''Train the Agent.'''
        if self.Epsilon_gen is not None:
            self.Epsilon = next(self.Epsilon_gen)
        batch = random.sample(self.Memory, self.Batch_size)

        pseq_batch = np.concatenate([b[0] for b in batch], axis=0)
        action_batch = np.stack([b[1] for b in batch])
        reward_batch = np.array([b[2] for b in batch])
        seq_batch = np.concatenate([b[3] for b in batch], axis=0)
        term_batch = np.array([b[4] for b in batch])

        out = self.model.predict_on_batch(seq_batch)
        y_batch = self.model.predict_on_batch(pseq_batch)
        y_batch[action_batch == 1] = reward_batch + self.Gamma * np.max(
            out, axis=1) * np.invert(term_batch)
        return self.model.train_on_batch(pseq_batch, y_batch)

    def get_epsilon(self):
        return self.Epsilon