def play_round(game: GameWrapper): algo = GreedyBird(game) logger.critical('Starting Game') game.jump() while True: algo.tick() game.raise_game_status()
def main(): game = GameWrapper() while True: try: play_round(game) except GameOver as e: another_game = input('Game over. Try again? [Y/N]') if another_game.upper() == 'Y': game.jump() continue else: raise e
def StartNewGame(game, players): """ Start a New Game """ global games global id currentId = id games[currentId] = GameWrapper(currentId, game, players) id += 1 return currentId
LEARNING_RATE, INPUT_SHAPE, ENV_NAME, BATCH_SIZE, EVAL_LENGTH, ) from game_wrapper import GameWrapper from replay_buffer import ReplayBuffer from agent import Agent ENV_NAME = 'BreakoutDeterministic-v4' # Create environment game_wrapper = GameWrapper(ENV_NAME, MAX_NOOP_STEPS) print("The environment has the following {} actions: {}".format(game_wrapper.env.action_space.n, game_wrapper.env.unwrapped.get_action_meanings())) # Create agent MAIN_DQN = build_q_network(game_wrapper.env.action_space.n, LEARNING_RATE, input_shape=INPUT_SHAPE) TARGET_DQN = build_q_network(game_wrapper.env.action_space.n, input_shape=INPUT_SHAPE) replay_buffer = ReplayBuffer(size=MEM_SIZE, batch_size=BATCH_SIZE, input_shape=INPUT_SHAPE) agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, game_wrapper.env.action_space.n, input_shape=INPUT_SHAPE) print('Loading model...') agent.load('breakout-saves/save-13383/') print('Loaded') terminal = True eval_rewards = []
x = Conv2D(64, (3, 3), strides=1, kernel_initializer=VarianceScaling(scale=.2), activation='relu', use_bias=False)(x) x = Conv2D(1024, (7, 7), strides=1, kernel_initializer=VarianceScaling(scale=.2), activation='relu', use_bias=False)(x) val_stream, adv_stream = Lambda(lambda w: tf.split(w, 2, 3))(x) val_stream = Flatten()(val_stream) val = Dense(1, kernel_initializer=VarianceScaling(scale=2.0))(val_stream) adv_stream = Flatten()(adv_stream) adv = Dense(n_actions, kernel_initializer=VarianceScaling(scale=2.0))(adv_stream) reduce_mean = Lambda(lambda w: tf.reduce_mean(w, axis=1, keepdims=True)) q_vals = Add()([val, Subtract()([adv, reduce_mean(adv)])]) model = Model(model_input, q_vals) model.compile(Adam(learning_rate), loss=tf.keras.losses.Huber()) return model game_wrapper = GameWrapper(ENV_NAME, MAX_NOOP_STEPS) network = build_q_network(game_wrapper.env.action_space.n) target_network = build_q_network(game_wrapper.env.action_space.n) memory = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE, use_per=USE_PER) agent = Agent(network, target_network, memory, 4, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE, use_per=USE_PER) if LOAD_FROM is None: frame_number = 0 rewards = [] loss_list = [] else: meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER) frame_number = meta['frame_number'] rewards = meta['rewards'] loss_list = meta['loss_list'] try:
"""Diese Methode wird aufgerufen, wenn der Server ein Zustands-Update sendet.""" raise NotImplementedError() def surrender(self): """ACHTUNG: Mit dieser Methode gibt die KI auf""" self.send("SURRENDER") raise RuntimeError("SURRENDERED") def add_output(self, d, o): """Diese Methode nimmt eine Antwort und Output und hängt das Output an die Antwort.""" raise NotImplementedError() if __name__ == '__main__': from game_wrapper import GameWrapper print("encoding:", sys.getdefaultencoding()) print(sys.argv) # __name__ aifile propfile with open(sys.argv[2], "r") as f: props = properties_to_dict(f.read()) print("properties:") pprint(props) usermodule = import_module(".".join(sys.argv[1].split(".")[:-1])) print("Nutzer-Modul importiert") if not hasattr(usermodule, "AI"): ##TODO CRASH senden raise RuntimeError("No AI class in " + sys.argv[1]) print("Lasse GameWrapper laufen") gw = GameWrapper(usermodule.AI, props) gw.run()
import time import numpy as np from config import * from game_wrapper import GameWrapper import tensorflow as tf from network import build_q_network from prioritized_replay_buffer import ReplayBuffer from agent import Agent game_wrapper = GameWrapper(ENV_NAME, MAX_NOOP_STEPS, history_length=HISTORY_LENGTH) print("The environment has the following {} actions: {}".format( game_wrapper.env.action_space.n, game_wrapper.env.unwrapped.get_action_meanings())) # TensorBoard writer writer = tf.summary.create_file_writer(TENSORBOARD_DIR) # Build main and target networks MAIN_DQN = build_q_network(game_wrapper.env.action_space.n, LEARNING_RATE, input_shape=INPUT_SHAPE, history_length=HISTORY_LENGTH) TARGET_DQN = build_q_network(game_wrapper.env.action_space.n, input_shape=INPUT_SHAPE, history_length=HISTORY_LENGTH) replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE, use_per=USE_PER,