def episode(model, current_models): map_size = random.choice([32, 40, 56, 64]) num_players = random.choice([2, 4]) if os.path.exists('arena'): shutil.rmtree('arena') os.mkdir('arena') with Timer("playing game", True): play_game(map_size, num_players, current_models) replay = glob.glob('arena/*.hlt')[0] with Timer("generating features", True): f, m, r = get_inputs(replay, num_players) with Timer("policy update", True): model.policy_update(f, m, normalize_rewards(r)) model.save_model('models/policy_model2.ckpt')
def __init__(self, name, ckpt_file, params, learning=False): with Timer("start game"): # During init phase: initialize the model and compile it with Timer('Initialize Model'): my_model = MovementModel(cached_model=ckpt_file, params_file=params) # Get the initial game state game = Game() self.my_model = my_model self.game = game self.last_move = {} self.avoid = set() self.learning = learning if self.learning: self.move_file = open(move_file.format(self.game.my_id), 'w') self.warmup() game.ready(name)
def warmup(self, game_state): feature_list = [] feature_list.append(game_state.center_shift()) feature_map = np.stack(feature_list, axis=0) with Timer("Generate Prediction"): feed_dict = {self.x: feature_map, self.training: False} predictions = self.session.run([self.predictions], feed_dict=feed_dict)[0]
def generate_prob_move(self, game_state, ship_id): feature_list = [] feature_list.append(game_state.feature_shift(ship_id)) feature_map = np.stack(feature_list, axis=0) with Timer("Generate Prediction"): feed_dict = {self.x: feature_map, self.training: False} action = self.session.run([self.action], feed_dict=feed_dict)[0] move = action[0][0] return OUTPUT_TO_MOVE[move]
def game_map(self): if not self._map: with Timer("Build Game Map"): game_map = [[None for _ in range(self.map_size)] for _ in range(self.map_size)] for y_position in range(self.map_size): for x_position in range(self.map_size): game_map[y_position][x_position] = MapCell( Position(x_position, y_position), self.frame[y_position][x_position]) self._map = GameMap(game_map, self.map_size, self.map_size) return self._map
def predict(self, game_state): feature_list = [] feature_list.append(game_state.center_shift()) feature_map = np.stack(feature_list, axis=0) with Timer("Generate Prediction"): feed_dict = {self.x: feature_map, self.training: False} predictions = self.session.run([self.predictions], feed_dict=feed_dict)[0] log_message(predictions) _, moves = predictions moves = np.ndarray.flatten(moves) return bool(moves[0])
def generate_move(self, game_state, ship_id): feature_list = [] feature_list.append(game_state.feature_shift(ship_id)) feature_map = np.stack(feature_list, axis=0) with Timer("Generate Prediction"): feed_dict = {self.x: feature_map, self.training: False} predictions = self.session.run([self.predictions], feed_dict=feed_dict)[0] _, moves = predictions move_dict = {} moves = np.ndarray.flatten(moves) return [MOVE_TO_DIRECTION[OUTPUT_TO_MOVE[x]] for x in moves]
def feature_map(self): if self._feature_map is None: with Timer("Generate Feature Map"): feature_map = np.zeros((self.map_size, self.map_size, 46), dtype=np.float32) ships = set([x.position for x in self.ships.values()]) other_ships = set( [x.position for x in self.other_ships.values()]) dropoffs = set([x.position for x in self.dropoffs]) other_dropoffs = set([x.position for x in self.other_dropoffs]) for i, objs in enumerate( [ships, other_ships, dropoffs, other_dropoffs]): for y in range(self.map_size): for x in range(self.map_size): if Position(x=x, y=y) in objs: feature_map[y][x][i] = 1 i_base = 3 for y in range(self.map_size): for x in range(self.map_size): h_amount = self.game_map[Position(x=x, y=y)].halite_amount for i, threshold in enumerate(range(0, 1000, 50)): if h_amount <= threshold: feature_map[y][x][i + i_base] = 1 feature_map[y][x][23] = h_amount / 1000. i_base = 24 for ship_id, our_ship in self.ships.items(): h_amount = our_ship.halite_amount for i, threshold in enumerate(range(0, 1000, 50)): if h_amount >= threshold: feature_map[our_ship.position.y][ our_ship.position.x][i + i_base] = 1 feature_map[our_ship.position.y][ our_ship.position.x][44] = h_amount / 1000. for y in range(self.map_size): for x in range(self.map_size): feature_map[y][x][45] = self.turn_number if self.map_size == MAX_BOARD_SIZE: self._feature_map = feature_map else: self._feature_map = np.tile(feature_map, (2, 2, 1)) return self._feature_map
def run(self): # Some minimal state to say when to go home go_home = defaultdict(lambda: False) while True: logging.warning("turn {}".format(self.game.turn_number)) with Timer("update frame", self.game.turn_number < 5): self.game.update_frame() turn_start = time.time() me = self.game.me # Here we extract our player metadata from the game state game_map = self.game.game_map # And here we extract the map metadata other_players = [p for pid, p in self.game.players.items() if pid != self.game.my_id] with Timer("create avoid set", self.game.turn_number < 5): self.avoid = set() for player in other_players: for ship in player.get_ships(): for dir in DIRECTION_ORDER: self.avoid.add(ship.position.directional_offset(dir)) command_queue = [] with Timer("generate state", self.game.turn_number < 5): state = self.generate_state(game_map, me, other_players, self.game.turn_number) for ship in me.get_ships(): # For each of our ships if time.time() - turn_start > 1.7: break # Did not machine learn going back to base. Manually tell ships to return home if ship.position == me.shipyard.position: go_home[ship.id] = False elif go_home[ship.id] or ship.halite_amount >= 1000 or (constants.MAX_TURNS - self.game.turn_number <= 25 and ship.halite_amount > 0 and not self.learning): with Timer("go home", self.game.turn_number < 5): go_home[ship.id] = True movement = game_map.get_safe_move(game_map[ship.position], game_map[me.shipyard.position]) if movement is not None: game_map[ship.position].mark_safe() game_map[ship.position.directional_offset(movement)].mark_unsafe(ship) send_command(ship.move(movement)) else: bulldoze = False has_asshole = game_map[me.shipyard.position].is_occupied and game_map[me.shipyard.position].ship.owner != me.id if (constants.MAX_TURNS - self.game.turn_number <= 25 and ship.halite_amount > 0) or has_asshole: for direction in game_map.get_unsafe_moves(ship.position, me.shipyard.position): target_pos = ship.position.directional_offset(direction) if target_pos == me.shipyard.position: bulldoze = True send_command(ship.move(direction)) break if not bulldoze: ship.stay_still() continue # Use machine learning to get a move if self.learning: output = self.my_model.generate_prob_move(state, ship.id) self.move_file.write("{},{},{}\n".format(self.game.turn_number, ship.id, output)) self.move_file.flush() ml_move = MOVE_TO_DIRECTION[output] backup = ml_move else: with Timer("predict move", self.game.turn_number < 5): ml_move, backup = self.my_model.generate_move(state, ship.id) with Timer("make move", self.game.turn_number < 5): if ml_move is not None: if ml_move != positionals.Direction.Still and ship.halite_amount < (game_map[ship.position].halite_amount/10) and not self.learning: ship.stay_still() continue if (game_map[ship.position].has_structure and ship.halite_amount == 0 and (ml_move == positionals.Direction.Still or game_map[ship.position.directional_offset(ml_move)].is_occupied)): for i in DIRECTION_ORDER: if game_map.get_safe_move(game_map[ship.position], game_map[ship.position.directional_offset(i)]): ml_move = i break if ml_move == positionals.Direction.Still and self.game.turn_number > 20: move = random.choice(DIRECTION_ORDER) game_map[ship.position].mark_safe() game_map[ship.position.directional_offset(move)].mark_unsafe(ship) send_command(ship.move(move)) continue if ml_move == positionals.Direction.Still and (game_map[ship.position].halite_amount == 0 or (game_map[ship.position].has_structure and ship.halite_amount == 0)): #logging.warning("Choosing random direction for {}".format(ship.id)) ml_move = backup if ml_move != positionals.Direction.Still and self.is_dumb_move(game_map, ship, ml_move): i = DIRECTION_ORDER.index(ml_move) stop = i + 3 while self.is_dumb_move(game_map, ship, ml_move) and i < stop: i += 1 ml_move = DIRECTION_ORDER[i%4] movement = game_map.get_safe_move(game_map[ship.position], game_map[ship.position.directional_offset(ml_move)]) if movement is not None: cell = game_map[ship.position.directional_offset(movement)] game_map[ship.position].mark_safe() cell.mark_unsafe(ship) self.last_move[ship.id] = ship.position send_command(ship.move(movement)) continue ship.stay_still() # Spawn some more ships with Timer("spawn", self.game.turn_number < 5): if me.halite_amount >= constants.SHIP_COST and self.game.turn_number <= constants.MAX_TURNS/2 and not game_map[me.shipyard.position].is_occupied: #if self.spawn_model.predict(state): send_command(self.game.me.shipyard.spawn()) #logging.warning("turn took {}".format(time.time() - turn_start)) self.game.end_turn(command_queue) # Send our moves back to the game environment
def warmup(self): with Timer("warmup", True): frame = [[y.halite_amount for y in x] for x in self.game.game_map._cells] s = GameState(0, frame, {}, {}, {}, [], []) self.my_model.warmup(s)
if os.path.exists('arena'): shutil.rmtree('arena') os.mkdir('arena') with Timer("playing game", True): play_game(map_size, num_players, current_models) replay = glob.glob('arena/*.hlt')[0] with Timer("generating features", True): f, m, r = get_inputs(replay, num_players) with Timer("policy update", True): model.policy_update(f, m, normalize_rewards(r)) model.save_model('models/policy_model2.ckpt') if __name__ == '__main__': model = MovementModel(cached_model='models/chosen3_rmrzx_82810.ckpt', params_file='params/rmrzx') model.save_model('models/policy_model2.ckpt') current_models = [ 'models/policy_model.ckpt', 'models/chosen3_rmrzx_82810.ckpt' ] i = 0 while True: with Timer('episode {}'.format(i), True): episode(model, current_models) i += 1 if i % 50 == 0: new_model = 'models/policy_model_{}.ckpt'.format(i) model.save_model(new_model) current_models.append(new_model)
import os import time import pickle import json import random import string import logging from player.utils import Timer, log_message with Timer("halite import", True): from player.state import GameState from player.constants import MAX_BOARD_SIZE, FEATURE_SIZE, OUTPUT_SIZE, MOVE_TO_DIRECTION, OUTPUT_TO_MOVE, MOVE_TO_OUTPUT with Timer("numpy import", True): import numpy as np with Timer("tf import", True): import tensorflow as tf from player.tf_contrib_copy import fully_connected, variance_scaling_initializer #with Timer("slim import", True): # import tensorflow.contrib.slim as slim def train_test_split(folder, data_size, split=0.2): files = np.array( sorted([os.path.join(folder, f) for f in os.listdir(folder)])) indices = np.random.permutation(files.shape[0]) test_size = int(data_size * split)
import os from player.utils import Timer import argparse parser = argparse.ArgumentParser() parser.add_argument("--learning", action="store_true", default=False) parser.add_argument("--ckpt", default="models/policy_model2.ckpt") args = parser.parse_args() with Timer("Import", True): #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from player.bot import Bot with Timer("Initializes", True): bot = Bot('policybot', args.ckpt, 'params/rmrzx', args.learning) bot.run()