def test_set_weights(self): model = lambda: create_test_model(10, 10) model_1 = FFNetAsync(model) model_2 = FFNetAsync(model) weights_1 = model_1.get_weights() weights_2 = model_2.get_weights() for w_1, w_2 in zip(weights_1, weights_2): w_1 = (w_1 + w_2) / 2 model_1.set_weights(weights_1)
def __init__(self, model, replay_size=1024, batch_size=32, gamma=0.99, epsilon=0.1, tau=0.0001, name='Agent', replay_type='simple', with_target=True, previous_frames=4): '''Create Agent from model description file.''' super().__init__(epsilon, gamma, replayType, memory_size) self.tau = tau self.batch_size = batch_size self.with_target = with_target self.previous_frames = previous_frames self.model = FFNetAsync(model) # If using a target network, create a duplicate network from the same # network architecture file if with_target is True: self.target_model = FFNetAsync(model)
def __init__(self, model, memory_size=1024, Batch_size=32, Gamma=0.99, Epsilon=rangefloat(1.0, 0.1, 1e6), K=1, name='Agent'): '''Create Agent from model description file.''' self.Memory = PrioritizedReplay(memory_size) self.Batch_size = Batch_size self.Gamma = Gamma if type(Epsilon) is float or type(Epsilon) is int: self.Epsilon = Epsilon self.Epsilon_gen = None else: self.Epsilon_gen = Epsilon self.Epsilon = next(self.Epsilon_gen) self.K = K self.current_state = None self.current_action = None self.model = FFNetAsync(model) self.target_model = FFNetAsync(model) self.terminal = False
def __init__(self, model, N=5): self.nets = [FFNetAsync(model) for i in range(N)]
class DDQNAgent(object): '''An agent class used in testing reinforcement learning algorithms. This class is made with the purpose that it would allow multiple agents to be trained concurrently in a single game so the majority of their work should be hidden behind this class. ''' def __init__(self, model, memory_size=1024, Batch_size=32, Gamma=0.99, Epsilon=rangefloat(1.0, 0.1, 1e6), K=1, name='Agent'): '''Create Agent from model description file.''' self.Memory = PrioritizedReplay(memory_size) self.Batch_size = Batch_size self.Gamma = Gamma if type(Epsilon) is float or type(Epsilon) is int: self.Epsilon = Epsilon self.Epsilon_gen = None else: self.Epsilon_gen = Epsilon self.Epsilon = next(self.Epsilon_gen) self.K = K self.current_state = None self.current_action = None self.model = FFNetAsync(model) self.target_model = FFNetAsync(model) self.terminal = False def initialize(self, current_state, current_action): self.current_state = current_state self.current_action = current_action self.terminal = False def chooseAction(self, time_step): '''Choose an action based on the current state.''' action = np.zeros(self.current_action.shape) if time_step % self.K == 0: if random.random() <= self.Epsilon: index = [random.randint(0, i - 1) for i in action.shape] action[index] = 1 else: self.model.predict_on_batch(self.current_state) x = self.model.collect() index = np.argmax(x) action[index] = 1 self.current_action = action.astype(np.uint8) return self.current_action def chooseOptimal(self): action = np.zeros(self.current_action.shape) self.target_model.predict_on_batch(self.current_state) x = self.target_model.collect() index = np.argmax(x) action[index] = 1 return action def feedback(self, frame, reward, terminal): '''Receive feedback from Game.''' self.model.predict_on_batch(self.current_state) new_state = np.append(frame, self.current_state[..., 0:-1], axis=3) self.target_model.predict_on_batch(new_state) Q = np.max(self.model.collect().flatten() * self.current_action) out = self.target_model.collect().flatten() T = reward + self.Gamma * np.max(out) self.Memory.insert((self.current_state, self.current_action, reward, new_state, terminal), abs(Q - T)) self.current_state = new_state self.terminal = terminal def isTerminal(self): return self.terminal def save(self, name): #self.model.save(name) pass def train(self): '''Train the Agent.''' if self.Epsilon_gen is not None: self.Epsilon = next(self.Epsilon_gen) batch = self.Memory.batch() pseq_batch = np.concatenate([b[0] for b in batch], axis=0) action_batch = np.stack([b[1] for b in batch]) reward_batch = np.array([b[2] for b in batch]) seq_batch = np.concatenate([b[3] for b in batch], axis=0) term_batch = np.array([b[4] for b in batch]) self.target_model.predict_on_batch(seq_batch) #self.model.predict_on_batch(seq_batch) self.model.predict_on_batch(pseq_batch) out = self.target_model.collect() #actions = self.model.collect() #out = out[np.arange(len(out)), np.argmax(actions, axis=1)].reshape(-1, 1) y_batch = self.model.collect() y_batch[action_batch == 1] = reward_batch + self.Gamma * np.max( out, axis=1) * np.invert(term_batch) self.model.train_on_batch(pseq_batch, y_batch) self.model.get_weights() self.target_model.get_weights() weights = self.model.collect() target_weights = self.target_model.collect() for i in range(len(target_weights)): target_weights[i] = target_weights[i] * (0.8) + weights[i] * 0.2 self.target_model.set_weights(target_weights) def get_epsilon(self): return self.Epsilon
for g in gens: results.append(g.send(False)) next(g) print('Time Elapsed: {!s}'.format(time() - begin)) begin = time() for i in range(10): for g in gens: results.append(g.send(False)) next(g) print('Time Elapsed: {!s}'.format(time() - begin)) array = np.random.rand(10000, 1000) f = lambda: create_test_model(1000, 10) gens = [FFNetAsync(f) for _ in range(NUM)] begin = time() for _ in range(10): for g in gens: g.predict_on_batch(array) for _ in range(10): results = [g.collect() for g in gens] print('Time Elapsed: {!s}'.format(time() - begin)) begin = time() for _ in range(10): for g in gens: g.predict_on_batch(array) for _ in range(10): results = [g.collect() for g in gens] print('Time Elapsed: {!s}'.format(time() - begin))
def test_get_weights(self): model = lambda: create_test_model(10, 10) model = FFNetAsync(model) weights = model.get_weights()
def test_shape_multilabel_multiple(self): model = lambda: create_test_model(10, 10) model = FFNetAsync(model) model.predict_on_batch(np.ones((10, 10))) pred = model.collect() self.assertEqual(pred.shape, (10, 10))
class DQNAgent(BaseDQNet): '''An agent class used in testing reinforcement learning algorithms. This class is made with the purpose that it would allow multiple agents to be trained concurrently in a single game so the majority of their work should be hidden behind this class. ''' def __init__(self, model, replay_size=1024, batch_size=32, gamma=0.99, epsilon=0.1, tau=0.0001, name='Agent', replay_type='simple', with_target=True, previous_frames=4): '''Create Agent from model description file.''' super().__init__(epsilon, gamma, replayType, memory_size) self.tau = tau self.batch_size = batch_size self.with_target = with_target self.previous_frames = previous_frames self.model = FFNetAsync(model) # If using a target network, create a duplicate network from the same # network architecture file if with_target is True: self.target_model = FFNetAsync(model) def save(self, name): '''Save the networks using the keras save function.''' self.model.save('{}.h5'.format(name)) if self.with_target is True: self.target_model.save('{}_target.h5'.format(name)) def train(self): '''Train the Agent.''' prev_state, actions, rewards, next_state, terms = self.batch( self.batch_size) # If not using a target network, use the online network instead. if self.with_target is True: target_model = self.target_model else: target_model = self.model # Handles the predictions asynchronously target_model.predict_on_batch(next_state) self.model.predict_on_batch(prev_state) next_Qvalues = target_model.collect() current_Qvalues = self.model.collect() # Updates the Q value based on current state given the actions # Q_online(s_t, a) = r + gamma*Q_target(s_t+1, a) # or if terminal is True # Q_online(s_t, a) = r new_Qvalues = Q_update(current_Qvalues, actions, rewards, self.gamma, next_Qvalues, terms) self.model.train_on_batch(prev_state, new_Qvalues) # If using a target network, update the weights of the target using the # online network's weights. if self.with_target is True: self.update_weights() def update_weights(self): '''Update the target network using the online network's weights.''' self.model.get_weights() self.target_model.get_weights() weights = self.model.collect() target_weights = self.target_model.collect() for i in range(len(target_weights)): target_weights[i] = target_weights[i] * ( 1 - self.tau) + weights[i] * self.tau self.target_model.set_weights(target_weights) def Qvalues(self, state, target=False): '''Return the Q values for the given state. If target is True then use the target network to predict the Q values. ''' if target is True and self.with_target is True: model = self.target_model else: model = self.model model.predict_on_batch(state) x = model.collect() return x def update_state(self, state): '''Create a new state given input. First reshapes the state so that it may appended. The shape is (1, width, height, number of states). Currently casting the new state as float 16 to save space while maintaining compatibility with different feature formats ( catergorical, RGB ). ''' reshaped_state = state.reshape((1, ) + state.shape + (1, )) return np.append(reshaped_state, self.current_state[..., 0:-1], axis=-1).astype(np.int8) def initialize(self, state, action): '''Initialize the agent with state and action.''' # Stacks the agents along a new axis. stacked_states = [state for _ in range(self.previous_frames)] new_state = np.stack(stacked_states, axis=-1) # Reshaped state shape is (1, width, height, number of prior states) single_state = new_state.reshape((1, ) + new_state.shape) self.current_state = single_state self.current_action = action self.terminal = False return True