Пример #1
0
    def __init__(self, make_video=False, replay=False, live_preview=False):
        self._actions = self.ACTIONS
        self.ROWS, self.COLS = s.ROWS, s.COLS
        self._live_preview = False

        args = namedtuple("args", [
            "no_gui", "fps", "log_dir", "turn_based", "update_interval",
            "save_replay", "replay", "make_video", "continue_without_training"
        ])
        args.continue_without_training = False
        args.save_replay = False
        args.log_dir = "agent_code/koetherminator"

        if make_video:  # not working yet!
            args.no_gui = False
            args.make_video = True  # obviously gotta change to True if ffmpeg issue is fixed
            args.fps = 15
            args.update_interval = 0.1
            args.turn_based = False

        elif live_preview:
            self._live_preview = True
            args.no_gui = False
            args.make_video = False
            args.fps = 15
            args.update_interval = 1
            args.turn_based = False

        else:
            args.no_gui = True
            args.make_video = False

        if replay:
            args.save_replay = True

        # agents = [("user_agent", True)] + [("rule_based_agent", False)] * (s.MAX_AGENTS-1)
        agents = [("user_agent", True)
                  ] + [("peaceful_agent", False)] * (s.MAX_AGENTS - 1)

        if not args.no_gui:
            pygame.init()

        self._world = BombeRLeWorld(args, agents)
        self._agent = self._world.agents[0]

        rb_agent_cfg = {"color": "blue", "name": "rule_based_agent"}
        rb_agent_backend = SequentialAgentBackend(False, rb_agent_cfg['name'],
                                                  rb_agent_cfg['name'])
        rb_agent_backend.start()
        self._rb_agent = Agent(rb_agent_cfg['color'],
                               rb_agent_cfg['name'],
                               rb_agent_cfg['name'],
                               train=False,
                               backend=rb_agent_backend)
Пример #2
0
def main(argv = None):
    parser = ArgumentParser()

    subparsers = parser.add_subparsers(dest='command_name', required=True)

    # Run arguments
    play_parser = subparsers.add_parser("play")
    agent_group = play_parser.add_mutually_exclusive_group()
    agent_group.add_argument("--my-agent", type=str, help="Play agent of name ... against three rule_based_agents")
    agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game")
    play_parser.add_argument("--train", default=0, type=int, choices=[0, 1, 2, 3, 4],
                             help="First … agents should be set to training mode")
    play_parser.add_argument("--continue-without-training", default=False, action="store_true")
    # play_parser.add_argument("--single-process", default=False, action="store_true")

    play_parser.add_argument("--n-rounds", type=int, default=10, help="How many rounds to play")
    play_parser.add_argument("--save-replay", const=True, default=False, action='store', nargs='?', help='Store the game as .pt for a replay')
    play_parser.add_argument("--no-gui", default=False, action="store_true", help="Deactivate the user interface and play as fast as possible.")

    # Replay arguments
    replay_parser = subparsers.add_parser("replay")
    replay_parser.add_argument("replay", help="File to load replay from")

    # Interaction
    for sub in [play_parser, replay_parser]:
        sub.add_argument("--fps", type=int, default=15, help="FPS of the GUI (does not change game)")
        sub.add_argument("--turn-based", default=False, action="store_true",
                         help="Wait for key press until next movement")
        sub.add_argument("--update-interval", type=float, default=0.1,
                         help="How often agents take steps (ignored without GUI)")
        sub.add_argument("--log_dir", type=str, default=os.path.dirname(os.path.abspath(__file__)) + "/logs")

        # Video?
        sub.add_argument("--make-video", default=False, action="store_true",
                         help="Make a video from the game")

    args = parser.parse_args(argv)
    if args.command_name == "replay":
        args.no_gui = False
        args.n_rounds = 1

    has_gui = not args.no_gui
    if has_gui:
        if not LOADED_PYGAME:
            raise ValueError("pygame could not loaded, cannot run with GUI")
        pygame.init()

    # Initialize environment and agents
    if args.command_name == "play":
        agents = []
        if args.train == 0 and not args.continue_without_training:
            args.continue_without_training = True
        if args.my_agent:
            agents.append((args.my_agent, len(agents) < args.train))
            args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1)
        for agent_name in args.agents:
            agents.append((agent_name, len(agents) < args.train))

        world = BombeRLeWorld(args, agents)
    elif args.command_name == "replay":
        world = ReplayWorld(args)
    else:
        raise ValueError(f"Unknown command {args.command_name}")

    # Emulate Windows process spawning behaviour under Unix (for testing)
    # mp.set_start_method('spawn')

    user_inputs = []

    # Start game logic thread
    t = threading.Thread(target=game_logic, args=(world, user_inputs, args), name="Game Logic")
    t.daemon = True
    t.start()

    # Run one or more games
    for _ in tqdm(range(args.n_rounds)):
        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # First render
        if has_gui:
            world.render()
            pygame.display.flip()

        round_finished = False
        last_frame = time()
        user_inputs.clear()

        # Main game loop
        while not round_finished:
            if has_gui:
                # Grab GUI events
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        if world.running:
                            world.end_round()
                        world.end()
                        return
                    elif event.type == pygame.KEYDOWN:
                        key_pressed = event.key
                        if key_pressed in (pygame.K_q, pygame.K_ESCAPE):
                            world.end_round()
                        if not world.running:
                            round_finished = True
                        # Convert keyboard input into actions
                        if s.INPUT_MAP.get(key_pressed):
                            if args.turn_based:
                                user_inputs.clear()
                            user_inputs.append(s.INPUT_MAP.get(key_pressed))

                # Render only once in a while
                if time() - last_frame >= 1 / args.fps:
                    world.render()
                    pygame.display.flip()
                    last_frame = time()
                else:
                    sleep_time = 1 / args.fps - (time() - last_frame)
                    if sleep_time > 0:
                        sleep(sleep_time)
            elif not world.running:
                round_finished = True
            else:
                # Non-gui mode, check for round end in 1ms
                sleep(0.001)

    world.end()
Пример #3
0
def main():
    pygame.init()
    # Emulate Windows process spawning behaviour under Unix (for testing)
    # mp.set_start_method('spawn')

    # Initialize environment and agents
    world = BombeRLeWorld([('TheImitator', False), ('simple_agent', False),
                           ('simple_agent', False), ('simple_agent', False)])
    # world = ReplayWorld('Replay 2019-01-30 16:57:42')
    user_inputs = []

    # Start game logic thread
    t = threading.Thread(target=game_logic, args=(world, user_inputs))
    t.daemon = True
    t.start()

    # Run one or more games
    for i in range(s.n_rounds):
        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # First render
        if s.gui:
            world.render()
            pygame.display.flip()

        round_finished = False
        last_update = time()
        last_frame = time()
        user_inputs.clear()

        # Main game loop
        while not round_finished:
            # Grab events
            key_pressed = None
            for event in pygame.event.get():
                if event.type == QUIT:
                    world.end_round()
                    world.end()
                    return
                elif event.type == KEYDOWN:
                    key_pressed = event.key
                    if key_pressed in (K_q, K_ESCAPE):
                        world.end_round()
                    if not world.running:
                        round_finished = True
                    # Convert keyboard input into actions
                    if s.input_map.get(key_pressed):
                        if s.turn_based:
                            user_inputs.clear()
                        user_inputs.append(s.input_map.get(key_pressed))

            if not world.running and not s.gui:
                round_finished = True

            # Rendering
            if s.gui and (time() - last_frame >= 1 / s.fps):
                world.render()
                pygame.display.flip()
                last_frame = time()
            else:
                sleep_time = 1 / s.fps - (time() - last_frame)
                if sleep_time > 0:
                    sleep(sleep_time)
                if not s.gui:
                    last_frame = time()

    world.end()
Пример #4
0
def main():
    pygame.init()

    # Emulate Windows process spawning behaviour under Unix (for testing)
    # mp.set_start_method('spawn')

    agent_list = [('cbt_agent', True), ('simple_agent', False),
                  ('simple_agent', False), ('simple_agent', False)]
    # stores the path to the last agent, which is used for the round number and extraction of theta at every 20th round
    path = './agent_code/{}/'.format(agent_list[-1][0])

    # Initialize environment and agents
    world = BombeRLeWorld(agent_list)

    # world = ReplayWorld('replay_3')# ('Replay 2019-01-30 16:57:42')
    user_inputs = []

    # Start game logic thread
    t = threading.Thread(target=game_logic, args=(world, user_inputs))
    t.daemon = True
    t.start()

    start_time = time()
    print('Start am {2}.{1}.{0}, um {3:02}:{4:02}:{5:02} Uhr.'.format(
        *time_.localtime(start_time)))
    # Run one or more games
    for i in range(s.n_rounds):
        if (i % 20 == 0):
            print(i)
        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # First render
        if s.gui:
            world.render()
            pygame.display.flip()

        round_finished = False
        last_update = time()
        last_frame = time()
        user_inputs.clear()

        # Main game loop
        while not round_finished:
            # Grab events
            key_pressed = None
            for event in pygame.event.get():
                if event.type == QUIT:
                    world.end_round()
                    world.end()
                    return
                elif event.type == KEYDOWN:
                    key_pressed = event.key
                    if key_pressed in (K_q, K_ESCAPE):
                        world.end_round()
                    if not world.running:
                        round_finished = True
                    # Convert keyboard input into actions
                    if s.input_map.get(key_pressed):
                        if s.turn_based:
                            user_inputs.clear()
                        user_inputs.append(s.input_map.get(key_pressed))

            if not world.running and not s.gui:
                round_finished = True

            # Rendering
            if s.gui and (time() - last_frame >= 1 / s.fps):
                world.render()
                pygame.display.flip()
                last_frame = time()
            else:
                sleep_time = 1 / s.fps - (time() - last_frame)
                if sleep_time > 0:
                    sleep(sleep_time)
                if not s.gui:
                    last_frame = time()

    world.end()
    end_time = time()
    print('Ende am {2}.{1}.{0}, um {3:02}:{4:02}:{5:02} Uhr.'.format(
        *time_.localtime(end_time)))
    print('Duration =', end_time - start_time, 's =',
          (end_time - start_time) / 60, 'min')
Пример #5
0
class BombermanGame:
    ACTIONS = ['UP', 'DOWN', 'LEFT', 'RIGHT', 'WAIT', 'BOMB']

    def __init__(self, make_video=False, replay=False, live_preview=False):
        self._actions = self.ACTIONS
        self.ROWS, self.COLS = s.ROWS, s.COLS
        self._live_preview = False

        args = namedtuple("args", [
            "no_gui", "fps", "log_dir", "turn_based", "update_interval",
            "save_replay", "replay", "make_video", "continue_without_training"
        ])
        args.continue_without_training = False
        args.save_replay = False
        args.log_dir = "agent_code/koetherminator"

        if make_video:  # not working yet!
            args.no_gui = False
            args.make_video = True  # obviously gotta change to True if ffmpeg issue is fixed
            args.fps = 15
            args.update_interval = 0.1
            args.turn_based = False

        elif live_preview:
            self._live_preview = True
            args.no_gui = False
            args.make_video = False
            args.fps = 15
            args.update_interval = 1
            args.turn_based = False

        else:
            args.no_gui = True
            args.make_video = False

        if replay:
            args.save_replay = True

        # agents = [("user_agent", True)] + [("rule_based_agent", False)] * (s.MAX_AGENTS-1)
        agents = [("user_agent", True)
                  ] + [("peaceful_agent", False)] * (s.MAX_AGENTS - 1)

        if not args.no_gui:
            pygame.init()

        self._world = BombeRLeWorld(args, agents)
        self._agent = self._world.agents[0]

        rb_agent_cfg = {"color": "blue", "name": "rule_based_agent"}
        rb_agent_backend = SequentialAgentBackend(False, rb_agent_cfg['name'],
                                                  rb_agent_cfg['name'])
        rb_agent_backend.start()
        self._rb_agent = Agent(rb_agent_cfg['color'],
                               rb_agent_cfg['name'],
                               rb_agent_cfg['name'],
                               train=False,
                               backend=rb_agent_backend)

    def actions(self):
        """
        getter for available actions in the Bomberman Game
        Returns: private list containing the possible actions
        """
        return self._actions

    def make_action(self, agent_action: str):
        """

        Args:
            agent_action: action to be taken.

        Returns:
            reward: reward resulting from the action that has been taken.

        """
        self._world.do_step(agent_action)

        events = self._agent.events

        reward = self.reward(events)

        if self._live_preview:
            self._world.render()
            self._world.gui.render_text(f"ACTION: {agent_action}", 800, 490,
                                        (255, 255, 255))
            self._world.gui.render_text(f"REWARD: {reward}", 800, 520,
                                        (50, 255, 50) if reward > 0 else
                                        (255, 50, 50))
            pygame.display.flip()
            sleep(0.03)

        return np.array(reward, dtype=np.float32)

    def get_world_state(self):
        return self._world.get_state_for_agent(self._agent)

    def get_observation(self):
        return self.get_observation_from_state(self.get_world_state())

    @staticmethod
    def reward(events: List[str]) -> float:
        """
        *This is not a required function, but an idea to structure your code.*

        Here you can modify the rewards your agent get so as to en/discourage
        certain behavior.
        """
        game_rewards = {
            e.COIN_COLLECTED: 1,
            e.KILLED_OPPONENT: 5,
            # positive auxiliary rewards
            e.BOMB_DROPPED: 0.001,
            #e.COIN_FOUND: 0.01,
            # e.SURVIVED_ROUND: 0.5,
            e.CRATE_DESTROYED: 0.1,
            e.MOVED_LEFT: 0.001,
            e.MOVED_RIGHT: 0.001,
            e.MOVED_UP: 0.001,
            e.MOVED_DOWN: 0.001,
            # negative auxiliary rewards
            e.INVALID_ACTION: -0.002,
            e.WAITED: -0.002,
            e.GOT_KILLED: -1,
            e.KILLED_SELF: -1
        }

        reward_sum = 0
        for event in events:
            if event in game_rewards:
                reward_sum += game_rewards[event]
        return reward_sum

    @classmethod
    def get_observation_from_state(cls, state):
        """
        Build a tensor of the observed board state for the agent.
        Layers:
        0: field with walls and crates
        1: revealed coins
        2: bombs
        3: agents (self and others)

        Returns: observation tensor

        """
        cols, rows = state['field'].shape[0], state['field'].shape[1]
        observation = np.zeros([rows, cols, 1], dtype=np.float32)

        # write field with crates
        observation[:, :, 0] = state['field']

        # write revealed coins
        if state['coins']:
            coins_x, coins_y = zip(*state['coins'])
            observation[list(coins_y), list(coins_x), 0] = 2  # revealed coins

        # write ticking bombs
        if state['bombs']:
            bombs_xy, bombs_t = zip(*state['bombs'])
            bombs_x, bombs_y = zip(*bombs_xy)
            observation[list(bombs_y), list(bombs_x), 0] = -2  # list(bombs_t)
        """
        bombs_xy = [xy for (xy, t) in state['bombs']]
        bombs_t = [t for (xy, t) in state['bombs']]
        bombs_x, bombs_y = [x for x, y in bombs_xy], [y for x, y in bombs_xy]
        observation[2, bombs_x, bombs_y] = bombs_t or 0
        """

        # write agents
        if state['self']:  # let's hope there is...
            _, _, _, (self_x, self_y) = state['self']
            observation[self_y, self_x, 0] = 3

        if state['others']:
            _, _, _, others_xy = zip(*state['others'])
            others_x, others_y = zip(*others_xy)
            observation[others_y, others_x, 0] = -3

        return observation

    def new_episode(self):
        # todo: End the world/game properly
        # if self._world.time_to_stop():
        #    self._world.end_round()

        # self._world.new_round()
        # if self._world.running:
        #    self._world.end_round()
        # self._world.end()

        if self._world.running:
            self._world.end_round()

        if not self._world.running:
            self._world.ready_for_restart_flag.wait()
            self._world.ready_for_restart_flag.clear()
            self._world.new_round()

    def is_episode_finished(self):
        return self._world.time_to_stop()

    def set_user_input(self, new_user_input):
        self._world.user_input = new_user_input
Пример #6
0
def main():
    pygame.init()

    # Emulate Windows process spawning behaviour under Unix (for testing)
    # mp.set_start_method('spawn')

    # Initialize environment and agents
    world = BombeRLeWorld([('simple_agent', False), ('simple_agent', False),
                           ('simple_agent', False), ('simple_agent', False)])
    # world = ReplayWorld('Replay 2019-01-30 16:57:42')
    user_inputs = []

    # Start game logic thread
    t = threading.Thread(target=game_logic, args=(world, user_inputs))
    t.daemon = True
    t.start()

    # Save agents at the beginning
    # agents = world.agents

    # Run one or more games
    for i in range(s.n_rounds):

        # if i % 2==0:
        # Variable crate density
        # world.set_crate_density(0)

        # To only use our agent
        # # for agent in agents:
        # # if agent.name == "Nobel":
        # # world.change_agents([agent])
        # else:
        # Variable crate density
        # world.set_crate_density(0.75)

        # To again use all of them
        # # world.change_agents(agents)

        if not world.running:
            world.ready_for_restart_flag.wait()
            world.ready_for_restart_flag.clear()
            world.new_round()

        # First render
        if s.gui:
            world.render()
            pygame.display.flip()

        round_finished = False
        last_update = time()
        last_frame = time()
        user_inputs.clear()

        # Main game loop
        while not round_finished:
            # Grab events
            key_pressed = None
            for event in pygame.event.get():
                if event.type == QUIT:
                    world.end_round()
                    world.end()
                    return
                elif event.type == KEYDOWN:
                    key_pressed = event.key
                    if key_pressed in (K_q, K_ESCAPE):
                        world.end_round()
                    if not world.running:
                        round_finished = True
                    # Convert keyboard input into actions
                    if s.input_map.get(key_pressed):
                        if s.turn_based:
                            user_inputs.clear()
                        user_inputs.append(s.input_map.get(key_pressed))

            if not world.running and not s.gui:
                round_finished = True

            # Rendering
            if s.gui and (time() - last_frame >= 1 / s.fps):
                world.render()
                pygame.display.flip()
                last_frame = time()
            else:
                sleep_time = 1 / s.fps - (time() - last_frame)
                if sleep_time > 0:
                    sleep(sleep_time)
                if not s.gui:
                    last_frame = time()

        # Prints the round counter and the total score for each agent
        # score=[]
        # for a in world.agents:
        # score += [a.total_score]
        # print("Round: "+str(i+1)+", Scores: "+str(score))

    world.end()