def __init__(self,
                 score_target=80,
                 out_of=100,
                 debug=False,
                 step_r=0.5,
                 lose_r=0.,
                 draw_r=0.5,
                 win_r=1.):
        # Set the env
        # score target = won of last out_of
        self.env = ke.make("connectx", debug=debug)
        self.configuration = self.env.configuration

        # (lose, draw, win) tuple
        self.standard_rewards = self.env.specification.reward.enum

        self.step_r = step_r
        self.lose_r = lose_r
        self.draw_r = draw_r
        self.win_r = win_r

        self.score_target = score_target
        self.out_of = out_of

        # Training config
        self.pair = [None, "random"]
        self.agent_position = 0
        self.trainer = self.env.train(self.pair)

        # Define required gym fields (examples):
        self.action_space = gym.spaces.Discrete(self.configuration.columns)

        self.observation_space = gym.spaces.Discrete(
            self.configuration.columns * self.configuration.rows)
    def test_convert_inputs(self):
        env = make("halite", debug=True)
        agent_count = 4
        env.reset(agent_count)
        obs = env.state[0].observation
        converted_tensor = convert_inputs(obs)
        #test_tensor = torch.tensor(4,21,21)
        ship1_index = obs['players'][0][2]['0-1'][0]
        ship2_index = obs['players'][1][2]['0-2'][0]
        ship3_index = obs['players'][2][2]['0-3'][0]
        ship4_index = obs['players'][3][2]['0-4'][0]

        col1, row1 = get_col_row(21, ship1_index)
        col2, row2 = get_col_row(21, ship2_index)
        col3, row3 = get_col_row(21, ship3_index)
        col4, row4 = get_col_row(21, ship4_index)

        self.assertEqual(converted_tensor[0, 2, row1, col1].item(), 1)
        self.assertEqual(converted_tensor[0, 2, row2, col2].item(), 2)
        self.assertEqual(converted_tensor[0, 2, row3, col3].item(), 3)
        self.assertEqual(converted_tensor[0, 2, row4, col4].item(), 4)

        #no shipyards should exist, and no ships should be holding any halite
        shipyard_equal_to_zero = torch.eq(converted_tensor[0, 1, :, :],
                                          torch.zeros(1, 1, 21, 21))
        ship_halite_equal_to_zero = torch.eq(converted_tensor[0, 3, :, :],
                                             torch.zeros(1, 1, 21, 21))
        self.assertEqual(torch.all(shipyard_equal_to_zero), True)
        self.assertEqual(torch.all(ship_halite_equal_to_zero), True)
def run_main(agent_file1, agent_file2, num_episodes):
    """Load one or two agents from filenames 

    Load one or two agents from filenames and run
    against each other. Agent file 2 can be a kaggle
    provided agent.

    """

    agent1 = load_agent(agent_file1)

    if os.path.exists(agent_file2):
        agent2 = load_agent(agent_file2)
    else:
        # assume it's a kaggle-compatible string
        agent2 = agent_file2

    print("Make env")
    env = ke.make("connectx", debug=True)
    env.reset()
    print("Run a single game against random (test, not recorded)")
    run_agent_game(env, agent1, "random", render=True)
    print("\nNote: If there was an error in running above,\n script"
          " probably won't fail but agent is likely kaput.\n")

    print("Do the full evaluation vs agent2")
    env.reset()
    mean_scores = compare_agents(env,
                                 agent1,
                                 agent2,
                                 num_episodes=num_episodes)
Пример #4
0
def replay_match(path, step=0):
    """Replay game to a specific step - necessary for recreating stateful values."""
    with open(path,encoding='ascii') as f:
        match = json.load(f)
    env = make("halite", configuration=match['configuration'], steps=match['steps'])
    myid = [pid for pid, name in enumerate(match['info']['TeamNames']) if name == "Ready Salted"][0]
    config = env.configuration
    # env already done - can write out full replay
    t = env.render(mode='html', return_obj=True, width=800, height=800, header=False, controls=True)
    write_html(t, 'replay.html')
    # If agent carries state across turns, need to run through all steps, or can directly index into a step otherwise
    # check that we are correct player
    # print('My Id: ', board.current_player_id, board.current_player)
    print(f'Running for:\n\t{path}\n\t{agent.__module__}\n\tID = {myid}\n')
    actTime = 0
    for step in range(400):
        state = match['steps'][step][0]  # list of length 1 for each step
        obs = state['observation']  # these are observations at this step
        obs['player'] = myid  # change the player to the one we want to inspect
        obs = structify(obs)  # turn the dict's into structures with attributes
        icon = '\|/-'[(obs.step+1) % 4]
        t0 = time.time()
        ret = agent(obs, config)
        actTime = time.time() - t0
        print(f'{icon} step+1: {obs.step +1} StepTime:{round(actTime,2)}')#, end="\r", flush=True)
Пример #5
0
def run_episode(AI,
                board_size: int = 21,
                max_turns: int = 400,
                starting_halite: int = 5000,
                agent_count: int = 4):
    """
    Runs a complete episode with a given AI and game settings.
    :param AI: The AI function to call
    :param board_size: The size of the board to use
    :param max_turns: The turn at which the scores will be totaled & the winner decided
    :param starting_halite: The amount of Halite that each agent starts with
    :param agent_count: How many agents should be simulated
    :return: The final scores of each player
    """
    environment = make("halite",
                       configuration={
                           "size": board_size,
                           "startingHalite": starting_halite
                       })
    environment.reset(num_agents=agent_count)
    state = environment.state[0]
    board = helpers.Board(raw_observation=state.observation,
                          raw_configuration=environment.configuration)

    environment.run(["random", "random"])
    out = environment.render(mode="html", width=500, height=450)

    f = open("halite.html", "w")
    f.write(out)
    f.close()
Пример #6
0
def test_threshold_tie():
    env = make("rps", configuration={"episodeSteps": 3, "tieRewardThreshold": 4})
    env.run([rock, paper])
    assert env.render(mode='ansi') == "Round 1: Rock vs Paper, Score: -1.0 to 1.0\nRound 2: Rock vs Paper, Score: 0 to 0\nGame ended on round 2, final score: 0 to 0\n"
    json = env.toJSON()
    assert json["rewards"] == [0, 0]
    assert json["statuses"] == ["DONE", "DONE"]
Пример #7
0
def selfplay(agent, against, num):
    training_data = []
    for game in range(num):
        turn = np.random.randint(2)
        env = make("connectx")
        if turn == 0:
            trainer = env.train([None, against])
        else:
            trainer = env.train([against, None])
        done = False
        states = []
        observation = trainer.reset()

        root = None
        while not done:
            policy = mcts(observation, env.state, agent, against)
            # print(observation)
            action = int(np.random.choice(range(7), p=policy))
            states.append([processObservation(observation), policy])
            # print(turn, processObservation(observation).reshape(6, 7), policy, sep='\n')
            observation, reward, done, info = trainer.step(action)
            if reward == None:
                reward = -1
        print("|", end='')
        training_data.append({'states': states, 'result': reward})
    return training_data
Пример #8
0
def main(runs: int):
    # submission = utils.read_file("rf_sub.py")
    env = make(
        "football",
        configuration={
            "save_video": True,
            "scenario_name": "11_vs_11_hard_stochastic",
            # "scenario_name": "11_vs_11_easy_stochastic",
            # "scenario_name": "11_vs_11_stochastic",
            "running_in_notebook": False,
            "dump_full_episodes": True,
            "logdir": "../cache/runs/",
            "render": True
        },
        debug=True)

    rewards = []
    for _ in range(runs):
        # output = env.run(["main.py", "do_nothing"])[-1]
        # output = env.run(["main.py", "run_right"])[-1]
        output = env.run(["main.py", "builtin_ai"])[-1]
        # output = env.run(["main.py", "main.py"])[-1]
        print('Left player: reward = %s, status = %s, info = %s' %
              (output[0]['reward'], output[0]['status'], output[0]['info']))
        rewards.append(output[0]['reward'])
    # print('Right player: reward = %s, status = %s, info = %s' %
    #       (output[1]['reward'], output[1]['status'], output[1]['info']))
    # env.render(mode="human", width=800, height=600)
    print(rewards)
    print(np.mean(rewards))
Пример #9
0
    def _reset(self):
        self.last_reward = 0
        self.turns_counter = 0
        self.previous_ship_count = 0
        self.episode_ended = False
        self.total_reward = 0
        self.turns_not_moved = 0
        self.action_history = []
        # initialize game
        self.environment = make("halite",
                                configuration={
                                    "size": self._board_size,
                                    "startingHalite": 1000,
                                    "episodeSteps": self._max_turns
                                })
        self.environment.reset(self._agent_count)
        # get board
        self.board = self.get_board()
        self.state = np.zeros(
            [self._channels, self._board_size, self._board_size])
        self.state_history = [self.state] * self._frames

        self.prime_board()
        return_object = ts.restart(np.array(self.state_history,
                                            dtype=np.float))
        return return_object
Пример #10
0
    def __init__(self, board_size=5, startingHalite=1000):

        self.agent_count = 1
        self.board_size = board_size
        self.max_nb_ships = 1

        self.environment = make("halite",
                                configuration={
                                    "size": board_size,
                                    "startingHalite": startingHalite
                                })
        self.environment.reset(self.agent_count)

        state = self.environment.state[0]
        self.board = Board(state.observation, self.environment.configuration)

        #self.max_steps = 400
        #self.current_step = 0

        # Ship actions in order:
        # [Hold, North, East, South, West, Convert]
        self.ship_action_conversion_dict = {
            0: None,
            1: ShipAction.NORTH,
            2: ShipAction.EAST,
            3: ShipAction.SOUTH,
            4: ShipAction.WEST,
            5: ShipAction.CONVERT
        }
Пример #11
0
def simulate_one_game(agent_names):

    agent_0 = get_agent_class(agent_names[0])()
    agent_1 = get_agent_class(agent_names[1])()

    def action_0(obs):
        return agent_0.action_wrapper(obs)

    def action_1(obs):
        return agent_1.action_wrapper(obs)

    env = make(
        environment="football",
        configuration={
            "save_video": False,
            "scenario_name": "11_vs_11_kaggle",
            # "episodeSteps": 10,
        },
    )
    env.reset()
    env.run([action_0, action_1])
    score = env.state[0]["observation"]["players_raw"][0]["score"]

    if agent_names[0] in GREENLIST_TO_SAVE:
        save_score_and_log(agent_0, score)
    if agent_names[1] in GREENLIST_TO_SAVE:
        save_score_and_log(agent_1, score[::-1])

    return score
Пример #12
0
    def __init__(self, hidden_dim, buffer_size, gamma, batch_size, device, writer):
        self.env = make("connectx", debug=True)
        self.device = device
        self.policy = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                          self.env.configuration.columns).to(
            device)

        self.target = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                          self.env.configuration.columns).to(
            device)
        self.enemyNet = Net(self.env.configuration.columns * self.env.configuration.rows, hidden_dim,
                            self.env.configuration.columns).to(
            device)
        self.target.load_state_dict(self.policy.state_dict())
        self.target.eval()
        self.buffer = ExperienceReplay(buffer_size)
        self.enemy = "random"
        self.trainingPair = self.env.train([None, self.enemy])
        self.loss_function = nn.MSELoss()
        self.optimizer = optim.Adam(params=self.policy.parameters(), lr=0.001)
        self.gamma = gamma
        self.batch_size = batch_size

        self.first = True
        self.player = 1
        self.writer = writer
Пример #13
0
def create_board(size=3, starting_halite=0, agent_count=2):
    env = make("halite",
               configuration={
                   "size": size,
                   "startingHalite": starting_halite
               })
    return Board(env.reset(agent_count)[0].observation, env.configuration)
def play(left_player,
         right_player,
         print_details=True,
         save_video=False,
         debug=True) -> List[Tuple[Dict[str, Any], Dict[str, Any]]]:
    env = make("football",
               debug=debug,
               configuration={
                   "save_video": save_video,
                   "scenario_name": "11_vs_11_kaggle"
               })

    output = env.run([left_player, right_player])

    if print_details:
        for s, (left, right) in enumerate(output):
            print(f"\nStep {s}")

            print(f"Left player ({left_player}): \n"
                  f"actions taken: {left['action']}, "
                  f"reward: {left['reward']}, "
                  f"status: {left['status']}, "
                  f"info: {left['info']}")

            print(f"Right player ({right_player}): \n"
                  f"actions taken: {right['action']}, "
                  f"reward: {right['reward']}, "
                  f"status: {right['status']}, "
                  f"info: {right['info']}\n")

        print(
            f"Final score: {output[-1][0]['reward']} : {output[-1][1]['reward']}"
        )

    return output
Пример #15
0
    def __init__(self):
        halite_env = make('halite', configuration=GAME_CONFIG, debug=True)
        self.env = halite_env.train(GAME_AGENTS)

        self.config = halite_env.configuration



        self.action_space = spaces.MultiDiscrete([N_SHIP_ACTIONS] * MAX_SHIPS +
                                                 [N_YARD_ACTIONS] * MAX_YARDS)
        self.action_space = gym_wrapper.spec_from_gym_space(space=self.action_space, name='action')

        self.observation_space = spaces.Box(low=0, high=1,
                                            shape=(self.config.size,
                                                   self.config.size,
                                                   N_FEATURES),
                                            dtype=np.float32)
        self.observation_space = gym_wrapper.spec_from_gym_space(space=self.observation_space, name='observation')

        self.observation_space = array_spec.BoundedArraySpec(
            shape=(self.config.size, self.config.size, N_FEATURES), dtype=np.int32, minimum=0,
            maximum=1, name='observation')


        self.reward_range = (REWARD_LOST, REWARD_WON)

        self.obs = None
        self.last_obs = None

        self.spec = None
        self.metadata = None
Пример #16
0
def _run_inner(lh_source: str, rh_source: str) -> SimulateResult:
    env = make("mab", debug=True)
    start_time = datetime.datetime.now()
    result = env.run([lh_source, rh_source])
    duration = datetime.datetime.now() - start_time

    return SimulateResult(duration, result, env)
Пример #17
0
    def __init__(self,
                 switch_prob=0.5,
                 use_random_training=True,
                 random_agent=False,
                 test_mode=False):
        self.env = make('connectx', debug=True)
        if use_random_training:
            if random.uniform(0, 1) < 0.6:
                self.pair = [None, 'negamax']
                print('create negamax agent')
            else:
                self.pair = [None, 'random']
                print('create random agent')
        else:
            self.pair = [None, 'negamax']

        #test setup
        if random_agent and test_mode:
            self.pair = [None, 'random']
        elif test_mode:
            self.pair = [None, 'negamax']

        self.trainer = self.env.train(self.pair)
        self.switch_prob = switch_prob

        # Define required gym fields (examples):
        config = self.env.configuration
        self.action_space = gym.spaces.Discrete(config.columns)
        self.observation_space = gym.spaces.Discrete(config.columns *
                                                     config.rows)
Пример #18
0
    def __init__(self,
                 switch_prob=0.5,
                 opponent='random',
                 invalid_action=-100):
        self.env = make('connectx')

        if opponent not in self.env.agents:
            raise InvalidArgument(f"Agent must be in {self.env.agents}")

        self.pair = [None, opponent]
        self.trainer = self.env.train(self.pair)
        self.switch_prob = switch_prob

        config = self.env.configuration
        self.action_space = spaces.Discrete(config.columns)
        self.observation_space = spaces.Box(low=0,
                                            high=2,
                                            dtype=np.uint8,
                                            shape=(config.columns *
                                                   config.rows, ))
        self.reward_range = [-1, 1]

        if invalid_action < self.reward_range[0]:
            self.reward_range[0] = invalid_action
        elif invalid_action > self.reward_range[1]:
            self.reward_range[1] = invalid_action

        self.invalid_action = invalid_action
Пример #19
0
def run_game(weights):
    critic = Critic([64, 64, 64, 64, 32, 32, 32, 32, 16, 16], NUM_ACT, STOCK_X)
    critic(critic.stock)
    critic.set_weights(pickle.loads(weights))

    geese = [Goose(critic) for _ in range(NUM_GEESE)]
    env = make('hungry_geese')
    steps = env.run(geese)

    for i, step in enumerate(steps):
        if i <= 0:
            continue

        obs = step[0]['observation']

        for ii, goose in enumerate(obs['geese']):
            if goose:
                geese[ii].cs[i - 1].r += 1 + len(goose)
            elif steps[i - 1][0]['observation']['geese'][ii]:
                geese[ii].cs[i - 1].r -= 10

    dat = list()

    for goose in geese:
        for c in goose.cs[:-1]:
            c.ss = True

        if goose.cs[-1].r >= 0.:
            goose.cs[-1].r /= (1. - NUM_LAMBDA) * 2.

        dat.extend(goose.cs)

    return dat
Пример #20
0
def playGame(pnet, nnet, args, player):
    """
    Executes one episode of a game.
    """
    env = make("hungry_geese",
               configuration={
                   "rows": args.boardSize[0],
                   "columns": args.boardSize[1]
               },
               debug=False)
    env.reset(args.numAgents)

    prev_actions = [None] * args.numAgents

    while env.state[player]['status'] == 'ACTIVE' and not env.done:
        board = get_board(env.state[0].observation, prev_actions, args)

        # predict players' action
        actions = []
        pis, _ = pnet.predicts(board, player % args.n_gpus)
        for i, pi in enumerate(pis):
            # this player uses nnet
            if i == player:
                pi, _ = nnet.predict(board, player, player % args.n_gpus)

            action = select_action(pi, prev_actions[i])
            actions.append(action)

        env.step(actions)

        prev_actions = actions

    reward = get_reward(env.state[0].observation, player, args.numAgents)
    length = env.state[0].observation.step
    return reward, length
Пример #21
0
def train_agent(episodes=20, steps_per_episode=50):
    env = make("halite", debug=True)
    # env.run(["random"])
    # env.render(mode="ipython", width=800, height=600)

    print('configuration')
    print(env.configuration)

    ship_state_wrapper = ShipStateWrapper(radius=4,
                                          max_frames=1,
                                          map_size=int(
                                              env.configuration['size']))

    shipyard_state_wrapper = ShipYardStateWrapper(
        radius=4, max_frames=1, map_size=int(env.configuration['size']))

    print(env.configuration)

    print("Initialized state wrappers")

    ship_agent = Agent(alpha=0.99,
                       gamma=0.75,
                       n_actions=6,
                       batch_size=64,
                       epsilon=.9,
                       input_dims=ship_state_wrapper.state_size)

    shipyard_agent = Agent(alpha=0.99,
                           gamma=0.75,
                           n_actions=2,
                           batch_size=64,
                           epsilon=.9,
                           input_dims=shipyard_state_wrapper.state_size)

    print("Initialized agents")

    trainer = env.train([None, "random", "random", "random"])
    observation = trainer.reset()

    print("Initialized trainer")

    halite_env = HaliteEnv(opponents=3,
                           ship_state_wrapper=ship_state_wrapper,
                           shipyard_state_wrapper=shipyard_state_wrapper,
                           radius=4,
                           trainer=trainer)

    all_rewards = []

    for i in range(episodes):
        episode_rewards = play_episode(ship_agent=ship_agent,
                                       shipyard_agent=shipyard_agent,
                                       env=halite_env,
                                       configuration=env.configuration,
                                       episode_number=i,
                                       training=True,
                                       n_steps=steps_per_episode)
        all_rewards.append(episode_rewards)

    return all_rewards
Пример #22
0
def before_each(state=None, configuration=None):
    global env
    steps = [] if state == None else [state]
    env = make("football",
               steps=steps,
               configuration=configuration,
               debug=True)
Пример #23
0
def mcts(observation, state, agent, against, root=None):
    probs, value = agent(observation)
    root = Node(value, probs)
    current_n = root.number

    for i in range(MCTS_WAVES):
        current_n += 1
        env = make("connectx", debug=False)
        if (observation['mark'] == 1):
            trainer = env.train([None, against])
        else:
            trainer = env.train([against, None])
        env.state = state.copy()
        wave(root, agent, trainer, current_n)

    policy = []
    for child in root.links:
        if child is None:
            policy.append(0)
        else:
            policy.append(child.number)
    policy = np.array(policy)
    policy = policy**(1 / TEMPERATURE)
    policy = policy / policy.sum()

    return policy
Пример #24
0
def test_run_timeout():
    env = make("tictactoe",
               debug=True,
               configuration={
                   "agentTimeout": 10,
                   "actTimeout": 10,
                   "runTimeout": 6
               })
    state = env.run([custom1, custom3])[-1]
    assert state == [
        {
            "action": 0,
            "reward": 0,
            "info": {},
            "observation": {
                "board": [1, 2, 1, 2, 1, 2, 0, 0, 0],
                "mark": 1
            },
            "status": "ACTIVE",
        },
        {
            "action": 5,
            "reward": 0,
            "info": {},
            "observation": {
                "mark": 2
            },
            "status": "INACTIVE",
        },
    ]
Пример #25
0
def test_agents_can_timeout_on_act():
    env = make("tictactoe",
               debug=True,
               configuration={
                   "agentTimeout": 5,
                   "actTimeout": 1
               })
    state = env.run([custom1, custom3])[-1]
    assert state == [
        {
            "action": 0,
            "reward": 0,
            "info": {},
            "observation": {
                "board": [1, 2, 1, 0, 0, 0, 0, 0, 0],
                "mark": 1
            },
            "status": "DONE",
        },
        {
            "action": None,
            "reward": None,
            "info": {},
            "observation": {
                "mark": 2
            },
            "status": "TIMEOUT",
        },
    ]
Пример #26
0
def test_win():
    env = make("rps", configuration={"episodeSteps": 2})
    env.run([paper, rock])
    json = env.toJSON()
    print(json)
    assert json["rewards"] == [1, -1]
    assert json["statuses"] == ["DONE", "DONE"]
Пример #27
0
def test_halite_exception_action_has_error_status():
    env = make("halite", debug=True)
    def error_agent(obs, config):
        raise Exception("An exception occurred!")
    env.run([error_agent, random_agent])
    json = env.toJSON()
    assert json["name"] == "halite"
    assert json["statuses"] == ["ERROR", "DONE"]
Пример #28
0
    def __init__(self):
        self.env = make("connectx", debug=True)
        self.trainer = self.env.train([None, "random"])

        # Define required gym fields (examples):
        config = self.env.configuration
        self.action_space = gym.spaces.Discrete(config.columns)
        self.observation_space = gym.spaces.Discrete(config.columns * config.rows)
Пример #29
0
def get_sample_board(board_size, agent_count):
    environment = make(
        "halite", configuration={"size": board_size, "startingHalite": 1000}
    )
    environment.reset(agent_count)
    state = environment.state[0]
    board = Board(state.observation, environment.configuration)
    return board
Пример #30
0
def eval_model(model, board_size=20):
    environment = make("halite",
                       configuration={
                           "size": board_size,
                           "startingHalite": 1000
                       })
    environment.run([agent(model), extra_agent, extra_agent, extra_agent])
    return eval_env(environment)