next_pos[ship.id].x][2]] else: continue # print(f'Stopped ship id {ship.id} to prevent collision') next_ships[next_pos[ship.id].y][next_pos[ship.id].x].remove( ship) next_pos[ship.id].x = ship.x next_pos[ship.id].y = ship.y commands[ship.id] = MoveCommand(self.id, ship.id, 'O') q.extend(next_ships[ship.y][ship.x]) next_ships[ship.y][ship.x].append(ship) ret = list(commands.values()) if (len(next_ships[self.shipyard.y][self.shipyard.x]) == 0 and self.halite >= (1000 if self.pd is None else 5000) and game.max_turns - game.turn > 100): ret.append(SpawnShipCommand(self.id, None)) return ret if __name__ == '__main__': bot1 = FastBot() bot2 = StandardBot() players, cell_data, bank_data, owner_data, collisions = Game.run_game( [bot1, bot2], return_replay=True, map_gen='perlin') my_replay = Replayer.from_data(players, cell_data, bank_data, owner_data, collisions) my_replay.run()
ship.id].x].remove(ship) next_pos[ship.id].x = ship.x next_pos[ship.id].y = ship.y commands[ship.id] = MoveCommand(self.id, ship.id, 'O') q.extend(next_ships[ship.y][ship.x]) next_ships[ship.y][ship.x].append(ship) ret = list(commands.values()) if (len(next_ships[self.shipyard.y][self.shipyard.x]) == 0 and self.halite >= 1000 and rem > 100 and np.sum(game.cells[:, :, 0]) * 3 > self.map_starting_halite): ret.append(SpawnShipCommand(self.id, None)) return ret def __repr__(self): return f'{self.__class__.__name__}(id={self.id}, eps={self.eps}, training={self.memory is not None})' epsilon = MAX_EPSILON model = create_unet() model.load_weights('./checkpoints/cp5000_02.ckpt') mem = Memory(10000) for step_num in range(500): epsilon = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * math.exp( -LAMBDA * step_num) rl_bot = RLBot(model, memory=mem, eps=epsilon) Game.run_game([rl_bot, FastBot()], map_width=32, verbosity=0) print(f'Loss: {rl_bot.total_loss}') if step_num % 20 == 19: model.save_weights(f'./checkpoints/RL_checkpoint_{step_num}')
NUM_TRAIN = 1000 # number of full validation games SPARSE_FREQ = 0.02 DENSE_FREQ = 0.02 EPS = 0.05 NUM_VAL = 50 TRAIN_RATIO = 0.2 # proportion of training turns saved (number of games ran adjusted so total turns saved is same) VAL_RATIO = 0.1 # proportion of validation turns saved for i in range(3100, int(NUM_TRAIN / TRAIN_RATIO)): x = random.random() if x < SPARSE_FREQ: print( f'Creating sparse training game {i + 1:03}/{int(NUM_TRAIN / TRAIN_RATIO)}' ) Game.run_game([FastBot(eps=EPS), FastBot(eps=EPS)], map_width=32, map_gen='sparse', save_name=f'data/game{i}', save_split=TRAIN_RATIO, verbosity=0) elif x < SPARSE_FREQ + DENSE_FREQ: print( f'Creating dense training game {i + 1:03}/{int(NUM_TRAIN / TRAIN_RATIO)}' ) Game.run_game([FastBot(eps=EPS), FastBot(eps=EPS)], map_width=32, map_gen='dense', save_name=f'data/game{i}', save_split=TRAIN_RATIO, verbosity=0) else: print( f'Creating perlin training game {i+1:03}/{int(NUM_TRAIN / TRAIN_RATIO)}'