Exemplo n.º 1
0
def run_model(args):
    env = TankEnv(args.game_path,
                  opp_fp_and_elo=[(args.opp, 1000)],
                  game_port=args.base_port,
                  my_port=args.my_port,
                  image_based=args.image_based,
                  level_path=args.level_path,
                  rand_opp=args.rand_opp,
                  p=args.env_p,
                  opp_p=args.opp_env_p)
    model = None
    if args.p1:
        model = PPO.load(args.p1)
    elif args.p1same:
        model = PPO.load(args.opp)

    score = [0, 0, 0]
    print("Score: [Player1 Wins, Player2 Wins, Ties]")

    obs = env.reset()
    if args.image_based and (args.ai_view or args.rev_ai_view):
        fig = plt.gcf()
        fig.show()
        fig.canvas.draw()
    while True:
        if args.image_based and (args.ai_view or args.rev_ai_view):
            if not args.rev_ai_view:
                plt.imshow(obs, origin="lower")
            else:
                plt.imshow(env.opp_state, origin="lower")
            fig.canvas.draw()
        if model:
            action, _ = model.predict(obs)
        elif args.rand_p1:
            action = np.random.rand(5) * 2 - 1
        else:
            action = np.zeros(5, dtype=np.float32)
        obs, reward, done, info = env.step(action)
        if done:
            score[info["winner"]] += 1
            print("Score:", score)
            obs = env.reset()
class AIMatchmaker(gym.Env):
    metadata = {'render.modes': None}

    def __init__(self,
                 all_stats,
                 all_opps,
                 all_elos,
                 game_path,
                 model_dir,
                 base_port=50000,
                 my_port=50001,
                 image_based=False,
                 level_path=None,
                 env_p=3,
                 starting_elo=None,
                 K=16,
                 D=5.,
                 time_reward=-0.003,
                 matchmaking_mode=0,
                 elo_log_interval=10000,
                 win_loss_ratio=[0, 0]):
        super(AIMatchmaker, self).__init__()

        self.all_stats = combine_winrates(all_stats)
        self.all_opps = all_opps
        self.all_elos = all_elos
        self.model_dir = model_dir

        self.agent_elo = starting_elo if starting_elo != None else self.all_elos[
            0]
        self.env = TankEnv(game_path,
                           opp_fp_and_elo=[],
                           game_port=base_port,
                           my_port=my_port,
                           image_based=image_based,
                           level_path=level_path,
                           p=env_p,
                           time_reward=time_reward)
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space

        self.K = K
        self.D = D
        self.my_port = my_port
        self.mm = matchmaking_mode

        self.uncounted_games = np.array([0, 0], dtype=np.uint32)
        self.counted_game_sets = 0
        self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32)

        self.started = False
        self.next_opp()

        self.elo_log_interval = elo_log_interval
        self.num_steps = 0
        self.elo_log = []

    def next_opp(self):
        weights = np.zeros((len(self.all_elos)), dtype=np.float32)
        if self.mm == 1:
            # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed)
            weights += np.array([
                weight_func(elo - self.agent_elo, self.D)
                for elo in self.all_elos
            ],
                                dtype=np.float32)

        if any(self.win_loss_ratio):
            while all(self.uncounted_games >= self.win_loss_ratio):
                self.uncounted_games -= self.win_loss_ratio
                self.counted_game_sets += 1

            tmp = self.uncounted_games >= self.win_loss_ratio
            if tmp[0] and not tmp[1]:
                # Need more losses
                if self.mm == 1:
                    # Zero weights for opponents that have <= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo <= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have > ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo > self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(max(self.all_elos))] = 1
            elif not tmp[0] and tmp[1]:
                # Need more wins
                if self.mm == 1:
                    # Zero weights for opponents that have >= ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo >= self.agent_elo:
                            weights[i] = 0
                    # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1
                else:
                    # Equal probability for opponents that have < ELOs than agent
                    for i, elo in enumerate(self.all_elos):
                        if elo < self.agent_elo:
                            weights[i] = 1
                    # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs
                    if sum(weights) == 0:
                        weights[self.all_elos.index(min(self.all_elos))] = 1

        self.current_opp_idx = choice_with_normalization(
            [i for i in range(len(self.all_elos))], weights)
        self.current_opp = self.all_opps[self.current_opp_idx]
        self.current_opp_elo = self.all_elos[self.current_opp_idx]
        #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True)
        self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp),
                              self.current_opp_elo)

    def get_agent_elo(self):
        return self.agent_elo

    def reset(self):
        if self.started:
            last_winner = self.env.last_winner
            if last_winner == 0:
                win_rate = 1.
                self.uncounted_games[0] += 1
            elif last_winner == 1:
                win_rate = 0.
                self.uncounted_games[1] += 1
            else:
                win_rate = .5

            agent_elo_change, _ = elo_change(self.agent_elo,
                                             self.current_opp_elo, self.K,
                                             win_rate)
            self.agent_elo += int(agent_elo_change)
            #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True)
        else:
            self.started = True

        self.next_opp()
        return self.env.reset()

    def step(self, action):
        if self.num_steps % self.elo_log_interval == 0:
            self.elo_log.append(self.agent_elo)
        self.num_steps += 1
        return self.env.step(action)

    def render(self, mode='console'):
        raise NotImplementedError()

    def close(self):
        self.env.close()
Exemplo n.º 3
0
if args.image_based:
    model = PPO("CnnPolicy", env, n_steps=64)
else:
    model = PPO("MlpPolicy", env, n_steps=64)
    
print(model.policy)
  
try:
    if args.train:
        model.learn(total_timesteps=args.num_steps)
    else:
        obs = env.reset()
        if args.image_based and args.ai_view:
            fig = plt.gcf()
            fig.show()
            fig.canvas.draw()
        for _ in tqdm(range(args.num_steps)):
            if args.image_based and args.ai_view:
                plt.imshow(obs, origin="lower", interpolation='none')
                fig.canvas.draw()
            if model:
                action, _ = model.predict(obs)
            elif args.rand_p1:
                action = np.random.rand(5) * 2 - 1
            else:
                action = np.zeros(5, dtype=np.float32)
            obs, reward, done, info = env.step(action)
            if done:
                obs = env.reset()
finally:
    env.close()
                game_port=args.base_port+port, 
                my_port=args.base_port+port+1,
                level_path=args.level_path,
                image_based=pop_stats[p_idx]["image_based"],
                p=pop_stats[p_idx]["env_p"],
                verbose=True
                )
                
            print("Worker", args.worker_idx, "got here", 4, flush=True)
                
            for i,opp in enumerate(tqdm(pop, file=sys.stdout)):
                env.load_new_opp(0, curr_model_path(args.local_pop_dir, opp, pop_stats[pop.index(opp)]), 0)
                for j in range(args.N):
                    obs = env.reset()
                    side = -1 if args.from_right else 1
                    while env.raw_state[0] * side > 0:
                        obs = env.reset()
                        
                    done=False
                    for k in range(args.max_len+1):
                        traj_set[i,j,k,:,:,:] = obs
                        if done or k==args.max_len:
                            info_set[i,j] = k
                            break
                        else:
                            action, _ = p_model.predict(obs)
                            obs,_,done,_ = env.step(action)

            np.savez_compressed(args.model_dir + p + "/" + args.save_name, traj=traj_set, info=info_set)
        finally:
            env.close()