Exemplo n.º 1
0
def get_win_rate(agent1,
                 agent2,
                 environment: str,
                 configuration={},
                 episodes=100):
    rewards = evaluate(
        environment=environment,
        agents=[agent1, agent2],
        configuration=configuration,
        num_episodes=episodes // 2,
    )
    rewards += [[b, a] for [a, b] in evaluate(
        environment=environment,
        agents=[agent2, agent1],
        configuration=configuration,
        num_episodes=episodes - episodes // 2,
    )]

    agent_1_win_rate = np.round(rewards.count([1, -1]) / len(rewards),
                                decimals=2)
    print(f"Agent 1 Win Rate: {agent_1_win_rate}")
    agent_2_win_rate = np.round(rewards.count([-1, 1]) / len(rewards),
                                decimals=2)
    print(f"Agent 2 Win Rate: {agent_2_win_rate}")

    agent_1_invalid_games = rewards.count([None, 0])
    print(f"Agent 1 Invalid games: {agent_1_invalid_games}")
    agent_2_invalid_games = rewards.count([0, None])
    print(f"Agent 2 Invalid games: {agent_2_invalid_games}")
Exemplo n.º 2
0
def get_win_percentage(agent1, agent2, n_rounds=50):
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Your Agent's Win Percentage (in 50 game rounds):", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Tutorial Agent's Win Percentage (in 50 game rounds):", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    exercise_agent_win_percentage = np.round(outcomes.count([1,-1])/len(outcomes), 2)
    return exercise_agent_win_percentage
Exemplo n.º 3
0
def get_win_percentages(agent1, agent2, n_rounds=100):
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
Exemplo n.º 4
0
def get_win_percentages_and_score(agent1, agent2, n_rounds=100, silent=False):
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    if not silent:
        print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
        print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
        print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
        print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
        print("Number of Draws (in {} game rounds):".format(n_rounds), outcomes.count([0, 0]))
    return 3 * outcomes.count([1,-1]) + outcomes.count([0, 0])
Exemplo n.º 5
0
def one_against_three(agents):
    n_agents = len(agents)

    scores = np.zeros((n_agents, n_agents), dtype=np.int)

    print("Simulation of battles. It can take some time...")

    for ind_1 in range(n_agents):
        for ind_2 in range(n_agents):
            print(
                f"LOG: {agents[ind_1]} vs 3 X {agents[ind_2]}",
                end="\r"
            )

            current_score = evaluate(
                "hungry_geese",
                [
                    agents[ind_1],
                    agents[ind_2],
                    agents[ind_2],
                    agents[ind_2],
                ],
                num_episodes=100,
            )

            episode_winners = np.argmax(current_score, axis=1)
            episode_winner_counts = collections.Counter(episode_winners)

            scores[ind_1, ind_2] = episode_winner_counts.get(0, 0)

        print()

    return scores
Exemplo n.º 6
0
def one_on_one_with_two_simple(agents):
    n_agents = len(agents)

    scores = np.zeros((n_agents, n_agents), dtype=np.int)

    print("Simulation of battles. It can take some time...")

    for ind_1 in range(n_agents):
        for ind_2 in range(ind_1 + 1, n_agents):
            print(
                f"LOG: {agents[ind_1]} vs {agents[ind_2]} vs 2 X simple_toward",
                end="\r"
            )

            current_score = evaluate(
                "hungry_geese",
                [
                    agents[ind_1],
                    agents[ind_2],
                    "simple_toward.py",
                    "simple_toward.py",
                ],
                num_episodes=100,
            )

            episode_winners = np.argmax(current_score, axis=1)
            episode_winner_counts = collections.Counter(episode_winners)

            scores[ind_1, ind_2] = episode_winner_counts.get(0, 0)
            scores[ind_2, ind_1] = episode_winner_counts.get(1, 0)

        print()

    return scores
Exemplo n.º 7
0
def eval():
    scores = evaluate('hungry_geese', ['greedy', 'submission.py', 'greedy', 'greedy'], num_episodes=100)
    scoreboard = [0, 0, 0, 0]
    for score in scores:
        winner = np.argmax(score)
        scoreboard[winner] += 1
    print(scores)
    print(scoreboard)
    print()
Exemplo n.º 8
0
 def winPercentage(self, episode):
     # print("vs " + str(self.trainer.enemy))
     env = make("connectx", debug=True)
     env.render()
     env.reset()
     config = {'rows': 6, 'columns': 7, 'inarow': 4}
     outcomes = evaluate("connectx", [self.agent, self.trainer.enemy],
                         config, [], self.rounds // 2)
     # Agent 2 goes first (roughly) half the time
     outcomes += [[
         b, a
     ] for [a, b] in evaluate("connectx", [self.trainer.enemy, self.agent],
                              config, [], self.rounds - self.rounds // 2)]
     self.trainer.writer.add_scalar(
         'win_percentage_agent',
         np.round(outcomes.count([1, -1]) / len(outcomes), 2), episode)
     self.trainer.writer.add_scalar(
         'win_percentage_random',
         np.round(outcomes.count([-1, 1]) / len(outcomes), 2), episode)
Exemplo n.º 9
0
def compare_agents(env, agent1, agent2, num_episodes=10):
    """
    Args:
        env: the kaggle_environments-made env to run in
        agent1 (str|kaggle): either a Kaggle standard agent 
            loaded with load_agent or a path to your own 
            custom agent
        agent2 (str): as above, to compete with agent1
        num_episodes: How many times to average the game over
    """
    rewards = ke.evaluate("connectx", [agent1, agent2],
                          num_episodes=num_episodes)
    try:
        mean_rewards = np.mean(rewards, axis=0)
    except TypeError as te:
        raise TypeError(f"{te}None-reward likely means your submission"
                        f" file isn't runnable i.e. has an error in it.")
    print("mean reward of agent1 vs agent2:", mean_rewards[0], ":",
          mean_rewards[1])
    return mean_rewards
Exemplo n.º 10
0
def get_result(match_settings):
    start = datetime.now()
    outcomes = kaggle_environments.evaluate(
        'rps', [match_settings[0], match_settings[1]],
        num_episodes=match_settings[2],
        configuration={'debug': True})
    won, lost, tie, avg_score = 0, 0, 0, 0.
    for outcome in outcomes:
        score = outcome[0]
        if score > 0: won += 1
        elif score < 0: lost += 1
        else: tie += 1
        avg_score += score
    elapsed = datetime.now() - start
    opponent_name = os.path.basename(match_settings[1])
    opponent_name = os.path.splitext(opponent_name)[0]
    print(
        f'... vs {opponent_name:<30} --- {won:2d}/{tie:2d}/{lost:2d} --- {avg_score}'
    )
    return match_settings[1], won, lost, tie, elapsed, float(
        avg_score) / float(match_settings[2])
Exemplo n.º 11
0
# **      'signs': int 
# **      'tieRewardThreshold': int

# Making the environment
configuration = { 
      "actTimeout" : 1,
      "agentTimeout": 60,
      "runTimeout" : 1200 
   }

env = kg.make("rps", debug=True, configuration = configuration)

# Making agents from classes
ewa_agent = EWAAgent(sample_mode=False)
ewa_agent_sample = EWAAgent(sample_mode=True)

# Loading agent from file 
random_agent = "main.py"

# Evaluate 
agents = [ewa_agent_sample.play, ewa_agent.play]

steps = 1000
num_episodes = 10
results = kg.evaluate('rps', agents, configuration,  num_episodes= num_episodes)
agent_0_wins = [1 if rewards[0] > rewards[1] else 0 for rewards in results]
agent_0_draws= [1 if rewards[0] == rewards[1] else 0 for rewards in results]
print(f"""(Agent 0) Results. \n******Wins: {sum(agent_0_wins)/num_episodes * 100  :.2f}% of episodes
      Draws: {sum(agent_0_draws)/num_episodes * 100  :.2f}%""")
print(f"(Agent 0) Average rewards: {sum([rewards[0] for rewards in results])/num_episodes :.2f}.")
Exemplo n.º 12
0
def action_evaluate(args):
    return json.dumps(
        evaluate(args.environment, args.agents, args.configuration, args.steps,
                 args.episodes))
Exemplo n.º 13
0
while not env.done:
    my_action = my_agent(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    # env.render(mode="ipython", width=100, height=90, header=False, controls=False)
env.render()


def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))


# Run multiple episodes to estimate its performance.
print(
    "My Agent vs Random Agent:",
    mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=1000)))
print(
    "My Agent vs Negamax Agent:",
    mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))

import inspect
import os

os.chdir('E:\\Projects\\04_ConnectX')


def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)
Exemplo n.º 14
0
# import submission module to test as agent function
from submissions.submission_REINFORCE import agent_function
from kaggle_environments import evaluate, make, utils


# Adapted to new reward structure
# Count number of wins. Draws are losses...
def mean_reward(rewards, first=True):
    return sum(1 if r[0 if first else 1] == 1 else 0 for r in rewards) / float(len(rewards))

# Calculate in each direction and vs each build in agent
reward_random = mean_reward(evaluate("connectx", [agent_function, "random"], num_episodes=10))
print("Ours vs Random:", reward_random)
reward_negamax =  mean_reward(evaluate("connectx", [agent_function, "negamax"], num_episodes=10))
print("Ours vs Negamax:", reward_negamax)
reward_random_inv = mean_reward(evaluate("connectx", ["random", agent_function], num_episodes=10), first=False)
print("Random vs Ours:", reward_random_inv)
reward_negamax_inv =  mean_reward(evaluate("connectx", ["negamax", agent_function], num_episodes=10), first=False)
print("Negamax vs Ours:", reward_negamax_inv)
Exemplo n.º 15
0
def eval(agent):
    results = evaluate("connectx", [agent, "negamax"], num_episodes=10)
    final_result = agent_reward(results)
    print("my Agent vs Negamax Agent:", final_result)
Exemplo n.º 16
0
from tqdm import tqdm
from time import sleep

from kaggle_environments import make, evaluate

x = evaluate("rps", ["player_paper_lover.py", "player_random_player.py"],
             configuration={"episodeSteps": 1000})

print(x)

# import os
# print(os.listdir())
Exemplo n.º 17
0
import sys
from kaggle_environments import evaluate
args = sys.argv
agents = [args[1], args[2]]
### Do not edit above

rewards = evaluate("rps", agents=agents, configuration={"episodeSteps": 1000})

### Do not edit below
print('\nD_MATCH_FINISHED')
print(rewards[0])
Exemplo n.º 18
0
def test_can_evaluate():
    rewards = evaluate("connectx", ["random", "random"], num_episodes=2)
    assert (rewards[0][0] + rewards[0][1]
            == 1) and rewards[1][0] + rewards[1][1] == 1
Exemplo n.º 19
0
    env = make(ENV_NAME, configuration={"episodeSteps": NR_STEPS})

    for i in (list_agents):
        if not (os.path.exists(i)):
            raise ImportError('One of the agents path is not well defined')


    for ind_agent_1 in range(len(list_names)):
        for ind_agent_2 in range(ind_agent_1 + 1, len(list_names)):
            print(f"LOG: {list_names[ind_agent_1]} vs {list_names[ind_agent_2]}", end="\r")
            
            results = []
            for i in range(3):
                current_score = evaluate(
                    ENV_NAME, 
                    [list_agents[ind_agent_1], list_agents[ind_agent_2]], 
                    configuration={"episodeSteps": NR_STEPS}
                )
                if current_score[0][0] is None:
                    results.append(0)
                else:
                    results.append(current_score[0][0])

            
            scores[ind_agent_1, ind_agent_2] = np.mean(results)
            scores[ind_agent_2, ind_agent_1] = -np.mean(results)
            min_scores[ind_agent_1, ind_agent_2] = min(results)
            min_scores[ind_agent_2, ind_agent_1] = -min(results)
            max_scores[ind_agent_1, ind_agent_2] = max(results)
            max_scores[ind_agent_2, ind_agent_1] = -max(results)
        
Exemplo n.º 20
0
    "nash_equilibrium",
    "markov_agent",
    "memory_patterns",
    # "multi_armed_bandit",
    "opponent_transition_matrix",
    "decision_tree_classifier",
    "statistical_prediction",
]
list_agents = [agent_name + ".py" for agent_name in list_names]
simulation_times = 10
scores = np.zeros((len(list_names), simulation_times), dtype=int)

for i in range(simulation_times):
    for ind_agent_1 in range(len(list_names)):
        current_score = evaluate(
            "rps", ["multi_armed_bandit.py", list_agents[ind_agent_1]],
            configuration={"episodeSteps": 1000})
        print(i, list_names[ind_agent_1], current_score[0][0])
        if current_score[0][0] >= 20:
            add_score = 1
        elif current_score[0][0] <= -20:
            add_score = -1
        else:
            add_score = 0
        scores[ind_agent_1, i] = add_score

df_scores = pd.DataFrame(scores)
df_scores.index = list_names
print(df_scores.mean(axis=1))
# print(df_scores.std(axis=1))
print(df_scores.median(axis=1))
Exemplo n.º 21
0
def test_wins_against_4_randoms():
    scores = evaluate("halite", [agent, "random", "random", "random"],
                      num_episodes=1,
                      configuration={"agentExec": "LOCAL"})
    assert scores[0] == max(scores)
Exemplo n.º 22
0
def test_can_evaluate():
    rewards = evaluate("tictactoe", ["random", "reaction"], num_episodes=2)
    assert (rewards[0][0] + rewards[0][1]
            == 0) and rewards[1][0] + rewards[1][1] == 0
Exemplo n.º 23
0
def test_wins_against_random():
    my_score, enemy_score = evaluate("halite", [agent, "random"],
                                     num_episodes=1,
                                     configuration={"agentExec": "LOCAL"})[0]
    assert my_score > enemy_score
Exemplo n.º 24
0
# These lines test it against another ai
# print("My Agent vs Random Agent:", mean_reward(evaluate("connect x", [my_agent, "random"], num_episodes=10)))
# print("My Agent vs Negmax Agent:", mean_reward(evaluate("connect x", [my_agent, "negamax"], num_episodes=10)))

# Play your agent
env.play([my_agent, None], width=500, height=450)

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)



def mean_reward(rewards):
    return sum(r[0] for r in rewards) / sum(r[0] + r[1] for r in rewards)

write_agent_to_file(my_agent, "submission.py")

out = sys.stdout
submission = utils.read_file("submission.py")
agent = utils.get_last_callable(submission)
sys.stdout = out

env = make("connectx", debug=True)
# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [agent, "random"], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [agent, "negamax"], num_episodes=10)))
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")
import os

from kaggle_environments import make, register, evaluate
from kaggle_environments.envs.football import football

env = make("football", debug=True, configuration={"scenario_name": "test_example_multiagent", "team_1": 1, "team_2": 0, "episodeSteps": 100, "render": False, "save_video": True})
print(env.name, env.version)
print("Default Agents: ", *env.agents)

env.run(["run_right", "run_left"])
print("Video: %s" % env.football_video_path)
football.cleanup(env)
print("Logs stored in /tmp/football/%s" % env.id)


configuration = {"scenario_name": "test_example_multiagent", "team_1": 1, "team_2": 0, "episodeSteps": 100, "render": False, "save_video": True}
agents = ["run_right", "run_left"]
rewards = evaluate("football", agents, configuration, steps=[], num_episodes=10)
## Broken: evaluate looks only on rewards from the last step.
# (or should we finish after a scored goal??)
print(rewards)
football.cleanup_all()