Пример #1
0
def episode(model, current_models):
    map_size = random.choice([32, 40, 56, 64])
    num_players = random.choice([2, 4])

    if os.path.exists('arena'):
        shutil.rmtree('arena')
    os.mkdir('arena')

    with Timer("playing game", True):
        play_game(map_size, num_players, current_models)
    replay = glob.glob('arena/*.hlt')[0]
    with Timer("generating features", True):
        f, m, r = get_inputs(replay, num_players)
    with Timer("policy update", True):
        model.policy_update(f, m, normalize_rewards(r))
    model.save_model('models/policy_model2.ckpt')
Пример #2
0
    def __init__(self, name, ckpt_file, params, learning=False):
        with Timer("start game"):
            # During init phase: initialize the model and compile it
            with Timer('Initialize Model'):
                my_model = MovementModel(cached_model=ckpt_file, params_file=params)

            # Get the initial game state
            game = Game()
            self.my_model = my_model
            self.game = game
            self.last_move = {}
            self.avoid = set()
            self.learning = learning
            if self.learning:
                self.move_file = open(move_file.format(self.game.my_id), 'w')
            self.warmup()
            game.ready(name)
Пример #3
0
    def warmup(self, game_state):
        feature_list = []
        feature_list.append(game_state.center_shift())
        feature_map = np.stack(feature_list, axis=0)

        with Timer("Generate Prediction"):
            feed_dict = {self.x: feature_map, self.training: False}
            predictions = self.session.run([self.predictions],
                                           feed_dict=feed_dict)[0]
Пример #4
0
    def generate_prob_move(self, game_state, ship_id):
        feature_list = []
        feature_list.append(game_state.feature_shift(ship_id))
        feature_map = np.stack(feature_list, axis=0)

        with Timer("Generate Prediction"):
            feed_dict = {self.x: feature_map, self.training: False}
            action = self.session.run([self.action], feed_dict=feed_dict)[0]
        move = action[0][0]
        return OUTPUT_TO_MOVE[move]
Пример #5
0
 def game_map(self):
     if not self._map:
         with Timer("Build Game Map"):
             game_map = [[None for _ in range(self.map_size)]
                         for _ in range(self.map_size)]
             for y_position in range(self.map_size):
                 for x_position in range(self.map_size):
                     game_map[y_position][x_position] = MapCell(
                         Position(x_position, y_position),
                         self.frame[y_position][x_position])
             self._map = GameMap(game_map, self.map_size, self.map_size)
     return self._map
Пример #6
0
    def predict(self, game_state):
        feature_list = []
        feature_list.append(game_state.center_shift())
        feature_map = np.stack(feature_list, axis=0)

        with Timer("Generate Prediction"):
            feed_dict = {self.x: feature_map, self.training: False}
            predictions = self.session.run([self.predictions],
                                           feed_dict=feed_dict)[0]
            log_message(predictions)
        _, moves = predictions
        moves = np.ndarray.flatten(moves)
        return bool(moves[0])
Пример #7
0
    def generate_move(self, game_state, ship_id):
        feature_list = []
        feature_list.append(game_state.feature_shift(ship_id))
        feature_map = np.stack(feature_list, axis=0)

        with Timer("Generate Prediction"):
            feed_dict = {self.x: feature_map, self.training: False}
            predictions = self.session.run([self.predictions],
                                           feed_dict=feed_dict)[0]
        _, moves = predictions
        move_dict = {}
        moves = np.ndarray.flatten(moves)
        return [MOVE_TO_DIRECTION[OUTPUT_TO_MOVE[x]] for x in moves]
Пример #8
0
    def feature_map(self):
        if self._feature_map is None:
            with Timer("Generate Feature Map"):
                feature_map = np.zeros((self.map_size, self.map_size, 46),
                                       dtype=np.float32)

                ships = set([x.position for x in self.ships.values()])
                other_ships = set(
                    [x.position for x in self.other_ships.values()])
                dropoffs = set([x.position for x in self.dropoffs])
                other_dropoffs = set([x.position for x in self.other_dropoffs])

                for i, objs in enumerate(
                    [ships, other_ships, dropoffs, other_dropoffs]):
                    for y in range(self.map_size):
                        for x in range(self.map_size):
                            if Position(x=x, y=y) in objs:
                                feature_map[y][x][i] = 1
                i_base = 3
                for y in range(self.map_size):
                    for x in range(self.map_size):
                        h_amount = self.game_map[Position(x=x,
                                                          y=y)].halite_amount
                        for i, threshold in enumerate(range(0, 1000, 50)):
                            if h_amount <= threshold:
                                feature_map[y][x][i + i_base] = 1
                        feature_map[y][x][23] = h_amount / 1000.
                i_base = 24
                for ship_id, our_ship in self.ships.items():
                    h_amount = our_ship.halite_amount
                    for i, threshold in enumerate(range(0, 1000, 50)):
                        if h_amount >= threshold:
                            feature_map[our_ship.position.y][
                                our_ship.position.x][i + i_base] = 1
                    feature_map[our_ship.position.y][
                        our_ship.position.x][44] = h_amount / 1000.

                for y in range(self.map_size):
                    for x in range(self.map_size):
                        feature_map[y][x][45] = self.turn_number
                if self.map_size == MAX_BOARD_SIZE:
                    self._feature_map = feature_map
                else:
                    self._feature_map = np.tile(feature_map, (2, 2, 1))
        return self._feature_map
Пример #9
0
    def run(self):
        # Some minimal state to say when to go home
        go_home = defaultdict(lambda: False)
        while True:
            logging.warning("turn {}".format(self.game.turn_number))
            with Timer("update frame", self.game.turn_number < 5):
                self.game.update_frame()
                turn_start = time.time()
                me = self.game.me  # Here we extract our player metadata from the game state
                game_map = self.game.game_map  # And here we extract the map metadata
                other_players = [p for pid, p in self.game.players.items() if pid != self.game.my_id]


            with Timer("create avoid set", self.game.turn_number < 5):
                self.avoid = set()
                for player in other_players:
                    for ship in player.get_ships():
                        for dir in DIRECTION_ORDER:
                            self.avoid.add(ship.position.directional_offset(dir))

            command_queue = []


            with Timer("generate state", self.game.turn_number < 5):
                state = self.generate_state(game_map, me, other_players, self.game.turn_number)

            for ship in me.get_ships():  # For each of our ships
                if time.time() - turn_start > 1.7:
                    break
                # Did not machine learn going back to base. Manually tell ships to return home
                if ship.position == me.shipyard.position:
                    go_home[ship.id] = False
                elif go_home[ship.id] or ship.halite_amount >= 1000 or (constants.MAX_TURNS - self.game.turn_number  <= 25 and ship.halite_amount > 0 and not self.learning):
                    with Timer("go home", self.game.turn_number < 5):
                        go_home[ship.id] = True
                        movement = game_map.get_safe_move(game_map[ship.position], game_map[me.shipyard.position])
                        if movement is not None:
                            game_map[ship.position].mark_safe()
                            game_map[ship.position.directional_offset(movement)].mark_unsafe(ship)
                            send_command(ship.move(movement))
                        else:
                            bulldoze = False
                            has_asshole = game_map[me.shipyard.position].is_occupied and game_map[me.shipyard.position].ship.owner != me.id
                            if (constants.MAX_TURNS - self.game.turn_number  <= 25 and ship.halite_amount > 0) or has_asshole:
                                for direction in game_map.get_unsafe_moves(ship.position, me.shipyard.position):
                                    target_pos = ship.position.directional_offset(direction)
                                    if target_pos == me.shipyard.position:
                                        bulldoze = True
                                        send_command(ship.move(direction))
                                        break
                            if not bulldoze:
                                ship.stay_still()
                        continue

                # Use machine learning to get a move
                if self.learning:
                    output = self.my_model.generate_prob_move(state, ship.id)
                    self.move_file.write("{},{},{}\n".format(self.game.turn_number, ship.id, output))
                    self.move_file.flush()
                    ml_move = MOVE_TO_DIRECTION[output]
                    backup = ml_move
                else:
                    with Timer("predict move", self.game.turn_number < 5):
                        ml_move, backup = self.my_model.generate_move(state, ship.id)

                with Timer("make move", self.game.turn_number < 5):
                    if ml_move is not None:
                        if ml_move != positionals.Direction.Still and ship.halite_amount < (game_map[ship.position].halite_amount/10) and not self.learning:
                            ship.stay_still()
                            continue
                        if (game_map[ship.position].has_structure and ship.halite_amount == 0 and (ml_move == positionals.Direction.Still or game_map[ship.position.directional_offset(ml_move)].is_occupied)):
                            for i in DIRECTION_ORDER:
                                if game_map.get_safe_move(game_map[ship.position],
                                                          game_map[ship.position.directional_offset(i)]):
                                    ml_move = i
                                    break
                            if ml_move == positionals.Direction.Still and self.game.turn_number > 20:
                                move = random.choice(DIRECTION_ORDER)
                                game_map[ship.position].mark_safe()
                                game_map[ship.position.directional_offset(move)].mark_unsafe(ship)
                                send_command(ship.move(move))
                                continue


                        if ml_move == positionals.Direction.Still and (game_map[ship.position].halite_amount == 0 or (game_map[ship.position].has_structure and ship.halite_amount == 0)):
                            #logging.warning("Choosing random direction for {}".format(ship.id))
                            ml_move = backup

                        if ml_move != positionals.Direction.Still and self.is_dumb_move(game_map, ship, ml_move):
                            i = DIRECTION_ORDER.index(ml_move)
                            stop = i + 3

                            while self.is_dumb_move(game_map, ship, ml_move) and i < stop:
                                i += 1
                                ml_move = DIRECTION_ORDER[i%4]

                        movement = game_map.get_safe_move(game_map[ship.position],
                                                          game_map[ship.position.directional_offset(ml_move)])
                        if movement is not None:
                            cell = game_map[ship.position.directional_offset(movement)]
                            game_map[ship.position].mark_safe()
                            cell.mark_unsafe(ship)
                            self.last_move[ship.id] = ship.position
                            send_command(ship.move(movement))
                            continue
                    ship.stay_still()

            # Spawn some more ships
            with Timer("spawn", self.game.turn_number < 5):
                if me.halite_amount >= constants.SHIP_COST and self.game.turn_number <= constants.MAX_TURNS/2 and not game_map[me.shipyard.position].is_occupied:
                    #if self.spawn_model.predict(state):
                    send_command(self.game.me.shipyard.spawn())

            #logging.warning("turn took {}".format(time.time() - turn_start))
            self.game.end_turn(command_queue)  # Send our moves back to the game environment
Пример #10
0
 def warmup(self):
     with Timer("warmup", True):
         frame = [[y.halite_amount for y in x] for x in self.game.game_map._cells]
         s = GameState(0, frame, {}, {}, {}, [], [])
         self.my_model.warmup(s)
Пример #11
0
    if os.path.exists('arena'):
        shutil.rmtree('arena')
    os.mkdir('arena')

    with Timer("playing game", True):
        play_game(map_size, num_players, current_models)
    replay = glob.glob('arena/*.hlt')[0]
    with Timer("generating features", True):
        f, m, r = get_inputs(replay, num_players)
    with Timer("policy update", True):
        model.policy_update(f, m, normalize_rewards(r))
    model.save_model('models/policy_model2.ckpt')


if __name__ == '__main__':
    model = MovementModel(cached_model='models/chosen3_rmrzx_82810.ckpt',
                          params_file='params/rmrzx')
    model.save_model('models/policy_model2.ckpt')
    current_models = [
        'models/policy_model.ckpt', 'models/chosen3_rmrzx_82810.ckpt'
    ]
    i = 0
    while True:
        with Timer('episode {}'.format(i), True):
            episode(model, current_models)
        i += 1
        if i % 50 == 0:
            new_model = 'models/policy_model_{}.ckpt'.format(i)
            model.save_model(new_model)
            current_models.append(new_model)
Пример #12
0
import os
import time
import pickle
import json
import random
import string
import logging

from player.utils import Timer, log_message

with Timer("halite import", True):
    from player.state import GameState
    from player.constants import MAX_BOARD_SIZE, FEATURE_SIZE, OUTPUT_SIZE, MOVE_TO_DIRECTION, OUTPUT_TO_MOVE, MOVE_TO_OUTPUT

with Timer("numpy import", True):
    import numpy as np

with Timer("tf import", True):
    import tensorflow as tf

from player.tf_contrib_copy import fully_connected, variance_scaling_initializer

#with Timer("slim import", True):
#    import tensorflow.contrib.slim as slim


def train_test_split(folder, data_size, split=0.2):
    files = np.array(
        sorted([os.path.join(folder, f) for f in os.listdir(folder)]))
    indices = np.random.permutation(files.shape[0])
    test_size = int(data_size * split)
Пример #13
0
import os
from player.utils import Timer
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--learning", action="store_true", default=False)
parser.add_argument("--ckpt", default="models/policy_model2.ckpt")
args = parser.parse_args()

with Timer("Import", True):
    #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    from player.bot import Bot
with Timer("Initializes", True):
    bot = Bot('policybot', args.ckpt, 'params/rmrzx', args.learning)
bot.run()