示例#1
0
def play_round(game: GameWrapper):
    algo = GreedyBird(game)
    logger.critical('Starting Game')
    game.jump()
    while True:
        algo.tick()
        game.raise_game_status()
示例#2
0
def main():
    game = GameWrapper()
    while True:
        try:
            play_round(game)
        except GameOver as e:
            another_game = input('Game over.  Try again? [Y/N]')
            if another_game.upper() == 'Y':
                game.jump()
                continue
            else:
                raise e
示例#3
0
def StartNewGame(game, players):
    """ Start a New Game """
    global games
    global id
        
    currentId = id
    games[currentId] = GameWrapper(currentId, game, players)
    id += 1
    return currentId
                       LEARNING_RATE, 
                       INPUT_SHAPE, 
                       ENV_NAME, 
                       BATCH_SIZE,
                       EVAL_LENGTH,
                       
                        )
from game_wrapper import GameWrapper
from replay_buffer import ReplayBuffer
from agent import Agent


ENV_NAME = 'BreakoutDeterministic-v4'

# Create environment
game_wrapper = GameWrapper(ENV_NAME, MAX_NOOP_STEPS)
print("The environment has the following {} actions: {}".format(game_wrapper.env.action_space.n, game_wrapper.env.unwrapped.get_action_meanings()))

# Create agent
MAIN_DQN = build_q_network(game_wrapper.env.action_space.n, LEARNING_RATE, input_shape=INPUT_SHAPE)
TARGET_DQN = build_q_network(game_wrapper.env.action_space.n, input_shape=INPUT_SHAPE)

replay_buffer = ReplayBuffer(size=MEM_SIZE, batch_size=BATCH_SIZE, input_shape=INPUT_SHAPE)
agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, game_wrapper.env.action_space.n, input_shape=INPUT_SHAPE)

print('Loading model...')
agent.load('breakout-saves/save-13383/')
print('Loaded')

terminal = True
eval_rewards = []
示例#5
0
    x = Conv2D(64, (3, 3), strides=1, kernel_initializer=VarianceScaling(scale=.2), activation='relu', use_bias=False)(x)
    x = Conv2D(1024, (7, 7), strides=1, kernel_initializer=VarianceScaling(scale=.2), activation='relu', use_bias=False)(x)
    val_stream, adv_stream = Lambda(lambda w: tf.split(w, 2, 3))(x)
    
    val_stream = Flatten()(val_stream)
    val = Dense(1, kernel_initializer=VarianceScaling(scale=2.0))(val_stream)
    
    adv_stream = Flatten()(adv_stream)
    adv = Dense(n_actions, kernel_initializer=VarianceScaling(scale=2.0))(adv_stream)

    reduce_mean = Lambda(lambda  w: tf.reduce_mean(w, axis=1, keepdims=True))
    q_vals = Add()([val, Subtract()([adv, reduce_mean(adv)])])
    model = Model(model_input, q_vals)
    model.compile(Adam(learning_rate), loss=tf.keras.losses.Huber())
    return model
game_wrapper = GameWrapper(ENV_NAME, MAX_NOOP_STEPS)
network = build_q_network(game_wrapper.env.action_space.n)
target_network = build_q_network(game_wrapper.env.action_space.n)
memory = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE, use_per=USE_PER)
agent = Agent(network, target_network, memory, 4, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE, use_per=USE_PER)
if LOAD_FROM is None:
    frame_number = 0
    rewards = []
    loss_list = []
else:
    meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER)
    frame_number = meta['frame_number']
    rewards = meta['rewards']
    loss_list = meta['loss_list']

try:
示例#6
0
		"""Diese Methode wird aufgerufen, wenn der Server ein Zustands-Update sendet."""
		raise NotImplementedError()

	def surrender(self):
		"""ACHTUNG: Mit dieser Methode gibt die KI auf"""
		self.send("SURRENDER")
		raise RuntimeError("SURRENDERED")

	def add_output(self, d, o):
		"""Diese Methode nimmt eine Antwort und Output und hängt das Output an die Antwort."""
		raise NotImplementedError()


if __name__ == '__main__':
	from game_wrapper import GameWrapper
	print("encoding:", sys.getdefaultencoding())
	print(sys.argv)
	# __name__ aifile propfile
	with open(sys.argv[2], "r") as f:
		props = properties_to_dict(f.read())
	print("properties:")
	pprint(props)
	usermodule = import_module(".".join(sys.argv[1].split(".")[:-1]))
	print("Nutzer-Modul importiert")
	if not hasattr(usermodule, "AI"):
		##TODO CRASH senden
		raise RuntimeError("No AI class in " + sys.argv[1])
	print("Lasse GameWrapper laufen")
	gw = GameWrapper(usermodule.AI, props)
	gw.run()
示例#7
0
        """Diese Methode wird aufgerufen, wenn der Server ein Zustands-Update sendet."""
        raise NotImplementedError()

    def surrender(self):
        """ACHTUNG: Mit dieser Methode gibt die KI auf"""
        self.send("SURRENDER")
        raise RuntimeError("SURRENDERED")

    def add_output(self, d, o):
        """Diese Methode nimmt eine Antwort und Output und hängt das Output an die Antwort."""
        raise NotImplementedError()


if __name__ == '__main__':
    from game_wrapper import GameWrapper
    print("encoding:", sys.getdefaultencoding())
    print(sys.argv)
    # __name__ aifile propfile
    with open(sys.argv[2], "r") as f:
        props = properties_to_dict(f.read())
    print("properties:")
    pprint(props)
    usermodule = import_module(".".join(sys.argv[1].split(".")[:-1]))
    print("Nutzer-Modul importiert")
    if not hasattr(usermodule, "AI"):
        ##TODO CRASH senden
        raise RuntimeError("No AI class in " + sys.argv[1])
    print("Lasse GameWrapper laufen")
    gw = GameWrapper(usermodule.AI, props)
    gw.run()
示例#8
0
import time
import numpy as np
from config import *
from game_wrapper import GameWrapper
import tensorflow as tf
from network import build_q_network
from prioritized_replay_buffer import ReplayBuffer
from agent import Agent

game_wrapper = GameWrapper(ENV_NAME,
                           MAX_NOOP_STEPS,
                           history_length=HISTORY_LENGTH)
print("The environment has the following {} actions: {}".format(
    game_wrapper.env.action_space.n,
    game_wrapper.env.unwrapped.get_action_meanings()))

# TensorBoard writer
writer = tf.summary.create_file_writer(TENSORBOARD_DIR)

# Build main and target networks
MAIN_DQN = build_q_network(game_wrapper.env.action_space.n,
                           LEARNING_RATE,
                           input_shape=INPUT_SHAPE,
                           history_length=HISTORY_LENGTH)
TARGET_DQN = build_q_network(game_wrapper.env.action_space.n,
                             input_shape=INPUT_SHAPE,
                             history_length=HISTORY_LENGTH)

replay_buffer = ReplayBuffer(size=MEM_SIZE,
                             input_shape=INPUT_SHAPE,
                             use_per=USE_PER,