def make_env_stack(num_envs, game_path, base_port, game_log_path, opp_fp_and_elo, trainee_elo, elo_match=True, survivor=False, stdout_path=None, level_path=None, image_based=False, time_reward=0., env_p=3): if num_envs >= 1: envs = [] for i in range(num_envs): envs.append(lambda game_path=game_path, b=base_port + (i * 2), c=game_log_path.replace( ".txt", "-" + str(i) + ".txt"), d=opp_fp_and_elo, e =elo_match, f=trainee_elo, g=survivor, h=stdout_path. replace(".txt", "-" + str(i) + ".txt"), i=level_path, j =image_based, k=time_reward: TankEnv(game_path, game_port=b, game_log_path=c, opp_fp_and_elo=d, elo_match=e, center_elo=f, survivor=g, stdout_path=h, verbose=True, level_path=i, image_based=j, time_reward=k, p=env_p)) if num_envs == 1: env_stack = SubprocVecEnv(envs, start_method="fork") else: env_stack = SubprocVecEnv(envs, start_method="forkserver") env_stack.reset() return env_stack else: env = TankEnv(game_path, game_port=base_port, game_log_path=game_log_path, opp_fp_and_elo=opp_fp_and_elo, elo_match=elo_match, center_elo=trainee_elo, survivor=survivor, stdout_path=stdout_path, level_path=level_path, image_based=image_based, time_reward=time_reward, p=env_p) env.reset() return env
def __init__(self, all_stats, all_opps, all_elos, game_path, model_dir, base_port=50000, my_port=50001, image_based=False, level_path=None, env_p=3, starting_elo=None, K=16, D=5., time_reward=-0.003, matchmaking_mode=0, elo_log_interval=10000, win_loss_ratio=[0, 0]): super(AIMatchmaker, self).__init__() self.all_stats = combine_winrates(all_stats) self.all_opps = all_opps self.all_elos = all_elos self.model_dir = model_dir self.agent_elo = starting_elo if starting_elo != None else self.all_elos[ 0] self.env = TankEnv(game_path, opp_fp_and_elo=[], game_port=base_port, my_port=my_port, image_based=image_based, level_path=level_path, p=env_p, time_reward=time_reward) self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.K = K self.D = D self.my_port = my_port self.mm = matchmaking_mode self.uncounted_games = np.array([0, 0], dtype=np.uint32) self.counted_game_sets = 0 self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32) self.started = False self.next_opp() self.elo_log_interval = elo_log_interval self.num_steps = 0 self.elo_log = []
def run_model(args): env = TankEnv(args.game_path, opp_fp_and_elo=[(args.opp, 1000)], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, rand_opp=args.rand_opp, p=args.env_p, opp_p=args.opp_env_p) model = None if args.p1: model = PPO.load(args.p1) elif args.p1same: model = PPO.load(args.opp) score = [0, 0, 0] print("Score: [Player1 Wins, Player2 Wins, Ties]") obs = env.reset() if args.image_based and (args.ai_view or args.rev_ai_view): fig = plt.gcf() fig.show() fig.canvas.draw() while True: if args.image_based and (args.ai_view or args.rev_ai_view): if not args.rev_ai_view: plt.imshow(obs, origin="lower") else: plt.imshow(env.opp_state, origin="lower") fig.canvas.draw() if model: action, _ = model.predict(obs) elif args.rand_p1: action = np.random.rand(5) * 2 - 1 else: action = np.zeros(5, dtype=np.float32) obs, reward, done, info = env.step(action) if done: score[info["winner"]] += 1 print("Score:", score) obs = env.reset()
def make_ai_matchmaker_eval_stack(game_path, base_port, image_based, level_path, env_p, num_envs): envs = [] for i in range(num_envs): envs.append( lambda a=game_path, b=base_port+(i*2), c=base_port+(i*2)+1, d=image_based, e=level_path, f=env_p: TankEnv(a, opp_fp_and_elo=[], game_port=b, my_port=c, elo_match=False, image_based=d, level_path=e, p=f ) ) env_stack = SubprocVecEnv(envs, start_method="fork") return env_stack
class AIMatchmaker(gym.Env): metadata = {'render.modes': None} def __init__(self, all_stats, all_opps, all_elos, game_path, model_dir, base_port=50000, my_port=50001, image_based=False, level_path=None, env_p=3, starting_elo=None, K=16, D=5., time_reward=-0.003, matchmaking_mode=0, elo_log_interval=10000, win_loss_ratio=[0, 0]): super(AIMatchmaker, self).__init__() self.all_stats = combine_winrates(all_stats) self.all_opps = all_opps self.all_elos = all_elos self.model_dir = model_dir self.agent_elo = starting_elo if starting_elo != None else self.all_elos[ 0] self.env = TankEnv(game_path, opp_fp_and_elo=[], game_port=base_port, my_port=my_port, image_based=image_based, level_path=level_path, p=env_p, time_reward=time_reward) self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.K = K self.D = D self.my_port = my_port self.mm = matchmaking_mode self.uncounted_games = np.array([0, 0], dtype=np.uint32) self.counted_game_sets = 0 self.win_loss_ratio = np.array(win_loss_ratio, dtype=np.uint32) self.started = False self.next_opp() self.elo_log_interval = elo_log_interval self.num_steps = 0 self.elo_log = [] def next_opp(self): weights = np.zeros((len(self.all_elos)), dtype=np.float32) if self.mm == 1: # ELO based matchmaking, where ELOs closer to agent ELo is prefered (but not guarenteed) weights += np.array([ weight_func(elo - self.agent_elo, self.D) for elo in self.all_elos ], dtype=np.float32) if any(self.win_loss_ratio): while all(self.uncounted_games >= self.win_loss_ratio): self.uncounted_games -= self.win_loss_ratio self.counted_game_sets += 1 tmp = self.uncounted_games >= self.win_loss_ratio if tmp[0] and not tmp[1]: # Need more losses if self.mm == 1: # Zero weights for opponents that have <= ELOs than agent for i, elo in enumerate(self.all_elos): if elo <= self.agent_elo: weights[i] = 0 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 else: # Equal probability for opponents that have > ELOs than agent for i, elo in enumerate(self.all_elos): if elo > self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(max(self.all_elos))] = 1 elif not tmp[0] and tmp[1]: # Need more wins if self.mm == 1: # Zero weights for opponents that have >= ELOs than agent for i, elo in enumerate(self.all_elos): if elo >= self.agent_elo: weights[i] = 0 # Choose agent with lowest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 else: # Equal probability for opponents that have < ELOs than agent for i, elo in enumerate(self.all_elos): if elo < self.agent_elo: weights[i] = 1 # Choose agent with highest ELO if agent ELO is higher than all opponent ELOs if sum(weights) == 0: weights[self.all_elos.index(min(self.all_elos))] = 1 self.current_opp_idx = choice_with_normalization( [i for i in range(len(self.all_elos))], weights) self.current_opp = self.all_opps[self.current_opp_idx] self.current_opp_elo = self.all_elos[self.current_opp_idx] #print("thread", self.my_port, "current opp elo:", self.current_opp_elo, "agent elo:", self.agent_elo, flush=True) self.env.load_new_opp(0, opp_fp(self.model_dir, self.current_opp), self.current_opp_elo) def get_agent_elo(self): return self.agent_elo def reset(self): if self.started: last_winner = self.env.last_winner if last_winner == 0: win_rate = 1. self.uncounted_games[0] += 1 elif last_winner == 1: win_rate = 0. self.uncounted_games[1] += 1 else: win_rate = .5 agent_elo_change, _ = elo_change(self.agent_elo, self.current_opp_elo, self.K, win_rate) self.agent_elo += int(agent_elo_change) #print("THREAD", self.my_port, "CURRENT AGENT ELO:", self.agent_elo, flush=True) else: self.started = True self.next_opp() return self.env.reset() def step(self, action): if self.num_steps % self.elo_log_interval == 0: self.elo_log.append(self.agent_elo) self.num_steps += 1 return self.env.step(action) def render(self, mode='console'): raise NotImplementedError() def close(self): self.env.close()
if not os.path.isdir(args.model_dir): raise FileNotFoundError("Base directory for agent models is not a folder") if not os.path.exists(args.noun_file_path): raise FileNotFoundError("Inputted path does not lead to noun file") if not os.path.exists(args.adj_file_path): raise FileNotFoundError("Inputted path does not lead to adjective file") if args.num_envs > 1: envs = [] for i in range(args.num_envs): envs.append( lambda game_path=args.game_path, b=args.base_port+(i*2), c="gamelog-"+str(i)+".txt", d=args.level_path, e=args.image_based, f=args.env_p: TankEnv(game_path, game_port=b, game_log_path=c, level_path=d, image_based=e, p=f ) ) env_stack = DummyVecEnv(envs) else: env_stack = TankEnv(args.game_path, game_port=args.base_port, game_log_path="gamelog.txt", level_path=args.level_path, image_based=args.image_based, p=args.env_p) try: population = [] for i in range(args.start): agent_name, agent = gen_agent(env_stack, args.num_envs, args.model_dir, args.noun_file_path, args.adj_file_path, batch_size=args.batch_size, image_based=args.image_based, image_pretrain=args.image_pretrain, env_p=args.env_p) population.append(agent_name) if args.nem:
parser.add_argument("game_path", type=str, default=None, help="File path of game executable") parser.add_argument("--base_port", type=int, default=50000, help="Base port to be used for game environment") parser.add_argument("--my_port", type=int, default=50500, help="Port to be used on Python side of network socket connection") parser.add_argument("--image_based", action="store_true", help="Indicates that env observation space is image based, and will show those states using matplotlib") parser.add_argument("--level_path", type=str, default=None, help="Path to level file") parser.add_argument("--ai_view", action="store_true", help="Indicates that AI version of game state should be rendered") parser.add_argument("--train", action="store_true", help="Indicates that test should try training model (as opposed to just running eval)") parser.add_argument("--num_steps", type=int, default=128, help="Number of steps to run for") parser.add_argument("--env_p", type=int, default=3, help="p^2 pixels will represent one in-game grid square") args = parser.parse_args() print(args) env = TankEnv(args.game_path, opp_fp_and_elo=[], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, rand_opp=True, p=args.env_p) if args.image_based: model = PPO("CnnPolicy", env, n_steps=64) else: model = PPO("MlpPolicy", env, n_steps=64) print(model.policy) try: if args.train: model.learn(total_timesteps=args.num_steps) else: obs = env.reset()
print("Worker", args.worker_idx, "got here", 2, flush=True) for port,p in enumerate(my_pop): p_idx = pop.index(p) p_model = PPO.load(curr_model_path(args.local_pop_dir, p, pop_stats[pop.index(p)])) traj_set = np.full((len(pop), args.N, args.max_len+1, 12*pop_stats[p_idx]["env_p"], 20*pop_stats[p_idx]["env_p"], 3), 255, dtype=np.uint8) info_set = np.full((len(pop), args.N), -1, dtype=np.int16) print("Worker", args.worker_idx, "got here", 3, flush=True) try: env = TankEnv(args.game_path, opp_fp_and_elo=[], game_port=args.base_port+port, my_port=args.base_port+port+1, level_path=args.level_path, image_based=pop_stats[p_idx]["image_based"], p=pop_stats[p_idx]["env_p"], verbose=True ) print("Worker", args.worker_idx, "got here", 4, flush=True) for i,opp in enumerate(tqdm(pop, file=sys.stdout)): env.load_new_opp(0, curr_model_path(args.local_pop_dir, opp, pop_stats[pop.index(opp)]), 0) for j in range(args.N): obs = env.reset() side = -1 if args.from_right else 1 while env.raw_state[0] * side > 0: obs = env.reset()
def human_matchmaking(args): WINS = 0 LOSSES = 1 GAMES = 2 pop = load_pop(args.model_dir) all_stats = {} for p in pop: all_stats[p] = load_stats(args.model_dir, p) all_opps = sorted_keys(all_stats) all_opps.reverse() all_elos = [] for opp in all_opps: all_elos.append(int(avg_elo(all_stats[opp], avg_len=args.avg_len))) human_stats = get_human_stats(args.human_db) current_opp_idx = len(all_elos) // 2 current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] human_elo = human_stats["elo"][-1] if len( human_stats["elo"]) > 0 else current_opp_elo try: env = TankEnv(args.game_path, opp_fp_and_elo=[(opp_fp(args.model_dir, current_opp), current_opp_elo)], game_port=args.base_port, my_port=args.my_port, image_based=args.image_based, level_path=args.level_path, p=args.env_p) print("Starting matchmaking") while human_elo <= all_elos[-1]: print("Current opp:", current_opp) print("Opp elo:", current_opp_elo) print("Human elo:", human_elo) score = play_match(env, args.num_games) human_win_rate = ( (score[WINS] - score[LOSSES]) / sum(score) + 1) / 2 K = 16 human_elo_change, _ = elo_change(human_elo, current_opp_elo, K, human_win_rate) human_elo += int(human_elo_change) human_stats["elo"].append(human_elo) if not current_opp in human_stats["win_rate"]: human_stats["win_rate"][current_opp] = [0, 0, 0] human_stats["win_rate"][current_opp][WINS] += score[WINS] human_stats["win_rate"][current_opp][LOSSES] += score[LOSSES] human_stats["win_rate"][current_opp][GAMES] += sum(score) D = 5. current_opp_idx = elo_based_choice(all_elos, human_elo, D) current_opp = all_opps[current_opp_idx] current_opp_elo = all_elos[current_opp_idx] env.load_new_opp(0, opp_fp(args.model_dir, current_opp), current_opp_elo) print("CONGRATS, YOU ARE BETTER THAN ALL THE AGENTS!") finally: env.close()
parser.add_argument( "--env_p", type=int, default=3, help= "Image-based environment will draw one in-game grid square as p^2 pixels") args = parser.parse_args() print(args) obs_set = np.zeros((args.num_obs, 52), dtype=np.float32) img_set = np.zeros((args.num_obs, 12 * args.env_p, 20 * args.env_p, 3), dtype=np.uint8) try: env = TankEnv(args.game_path, opp_fp_and_elo=[], game_port=args.base_port, my_port=args.my_port, rand_opp=True) if not args.canvas_game_path: args.canvas_game_path = args.game_path canvas = TankEnv(args.canvas_game_path, opp_fp_and_elo=[], game_port=args.base_port + 1, my_port=args.my_port + 1, image_based=True, level_path=args.level_path, rand_opp=True, p=args.env_p) obs = env.reset()