# Train online network online_network.fit(replay_state, target, epochs=step, verbose=1, initial_epoch=step - 1, callbacks=[csv_logger, tensorboard]) # Periodically copy online network weights to target network if step % copy_steps == 0: target_network.set_weights(online_network.get_weights()) # And save weights if step % save_steps == 0: online_network.save_weights( os.path.join(weights_folder, 'weights_{}.h5f'.format(step))) gc.collect() # also clean the garbage ### BASELINE STRATEGIES FOR COMPARISON from mini_pacman import test, random_strategy, naive_strategy random_med = test(strategy=random_strategy, log_file='test_pacman_log_random.json') naive_med = test(strategy=naive_strategy, log_file='test_pacman_log_naive.json') custom_med = test(strategy=custom_strategy, log_file='test_pacman_log_custom.json') print( f'Random Median = {random_med} Naive Median = {naive_med} Custom Median = {custom_med}' )
import json from mini_pacman import PacmanGame from mini_pacman import test, random_strategy, naive_strategy with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) test(strategy=naive_strategy, log_file='test_pacman_log_naive.json')
minibatch = random.sample(replay_memory, batch_size) replay_state = np.array([get_state(x[0]) for x in minibatch]) replay_action = np.array([x[1] for x in minibatch]) replay_rewards = np.array([x[2] for x in minibatch]) replay_next_state = np.array([get_state(x[3]) for x in minibatch]) replay_done = np.array([x[4] for x in minibatch], dtype=int) target_predict = target_network.predict(replay_next_state) target_for_action = replay_rewards + (1-replay_done) * gamma * \ np.amax(target_network.predict(replay_next_state), axis=1) target = online_network.predict(replay_state) target[np.arange(batch_size), replay_action] = target_for_action online_network.fit(replay_state, target, epochs=step, verbose=1, initial_epoch=step - 1) if step % copy_steps == 0: target_network.set_weights(online_network.get_weights()) from keras.models import load_model def test_dqn_strategy(obs): q_values = online_network.predict(np.array([get_state(obs)])) action = epsilon_greedy(q_values, 0.05, obs['possible_actions']) return action + 1 from mini_pacman import test test(strategy=test_dqn_strategy, log_file='test_pacman_log.json')
# save the deep neural network that estimates the Q-values online_network.save('saved_dqn_mini_pacman_model.h5') from keras.models import load_model dqn_model = load_model('saved_dqn_mini_pacman_model.h5') def dqn_strategy(obs): q_values = dqn_model.predict(np.array([get_state(obs)]))[0] action = epsilon_greedy(q_values, eps_min, nb_actions) return action + 1 # Some sub-optimal strategies are availiable for comparison: random_strategy moves the agent by random selection of actions, naive_strategy uses some basic heuristics. from mini_pacman import test, random_strategy, naive_strategy test(strategy=random_strategy, log_file='test_pacman_log.json') test(strategy=naive_strategy, log_file='test_pacman_log.json') test(strategy=dqn_strategy, log_file='test_pacman_log.json') # to see the game get played with the DQN strategy import time obs = env.reset() env.render() state = get_state(obs) while not obs['end_game']: time.sleep(0.1) # select best next action using Q-Learning (no random component here, eps=0) action = dqn_strategy(obs) obs = env.make_action(action) state = get_state(obs) env.render()
from mini_pacman import test, random_strategy, naive_strategy with open('test_params.json', 'r') as file: read_params = json.load(file) game_params = read_params['params'] env = PacmanGame(**game_params) DQN = QLearn(env) train_params = config.training_params_local # change to rcc for real training DQN.train(name=train_params['name'], n_steps=train_params['n_steps'], warmup=train_params['warmup'], training_interval=train_params['training_interval'], copy_steps=train_params['copy_steps'], gamma=train_params['gamma'], skip_start=train_params['skip_start'], batch_size=train_params['batch_size'], double_dqn=train_params['double_dqn'], eps_max=train_params['eps_max'], eps_min=train_params['eps_min'], learning_rate=train_params['learning_rate']) weights_folder = os.path.join(train_params['name'], 'weights') DQN.online_network.load_weights( os.path.join(weights_folder, 'weights_last.h5f')) test(strategy=DQN.dqn_strategy, log_file='test_pacman_log_DQN_local_2.json')