def create_train_env( game_name, seed, eps, max_frame, num_thread, num_game_per_thread, actor_creator, *, terminal_on_life_loss=False, terminal_signal_on_life_loss=True, ): context = rela.Context() games = [] actors = [] for thread_idx in range(num_thread): env = rela.VectorEnv() for game_idx in range(num_game_per_thread): game = create_game( game_name, seed + thread_idx * num_game_per_thread + game_idx, eps[thread_idx * num_game_per_thread + game_idx], max_frame, terminal_on_life_loss, terminal_signal_on_life_loss, ) games.append(game) env.append(game) actor = actor_creator(thread_idx) thread = rela.BasicThreadLoop(actor, env, False) actors.append(actor) context.push_env_thread(thread) print("Finished creating environments with %d games" % (len(games))) return context, games, actors
def create_eval_env( game_name, num_thread, model_locker, actor_cls, seed, max_frame, *, eval_eps=0, terminal_on_life_loss=False, terminal_signal_on_life_loss=True, ): context = rela.Context() games = [] for i in range(num_thread): game = create_game( game_name, seed + i, eval_eps, max_frame, terminal_on_life_loss, terminal_signal_on_life_loss, ) games.append(game) env = rela.VectorEnv() env.append(game) actor = actor_cls(model_locker) thread = rela.BasicThreadLoop(actor, env, True) context.push_env_thread(thread) return context, games
def evaluate(agents, num_game, seed, bomb, eps, sad, *, hand_size=5, runners=None, device="cuda:0"): """ evaluate agents as long as they have a "act" function """ assert agents is None or runners is None if agents is not None: runners = [ rela.BatchRunner(agent, device, 1000, ["act"]) for agent in agents ] num_player = len(runners) context = rela.Context() games = create_envs( num_game, seed, num_player, hand_size, bomb, [eps], -1, sad, False, False, ) for g in games: env = hanalearn.HanabiVecEnv() env.append(g) actors = [] for i in range(num_player): actors.append(rela.R2D2Actor(runners[i], 1)) thread = hanalearn.HanabiThreadLoop(actors, env, True) context.push_env_thread(thread) for runner in runners: runner.start() context.start() while not context.terminated(): time.sleep(0.5) context.terminate() while not context.terminated(): time.sleep(0.5) for runner in runners: runner.stop() scores = [g.last_score() for g in games] num_perfect = np.sum([1 for s in scores if s == 25]) return np.mean(scores), num_perfect / len(scores), scores, num_perfect
def create_train_env( method, seed, num_thread, num_game_per_thread, actor_cons, max_len, num_player, bomb, greedy_extra, ): assert method in ["vdn", "iql"] context = rela.Context() games = [] actors = [] threads = [] print("training with bomb: %d" % bomb) for thread_idx in range(num_thread): env = rela.VectorEnv() for game_idx in range(num_game_per_thread): unique_seed = seed + game_idx + thread_idx * num_game_per_thread game = hanalearn.HanabiEnv( { "players": str(num_player), "seed": str(unique_seed), "bomb": str(bomb), }, max_len, greedy_extra, False, ) games.append(game) env.append(game) assert max_len > 0 if method == "vdn": # assert len(actor_cons) == 1 actor = actor_cons(thread_idx) actors.append(actor) thread = hanalearn.HanabiVDNThreadLoop(actor, env, False) else: assert len(actor_cons) == num_player env_actors = [] for i in range(num_player): env_actors.append(actor_cons[i](thread_idx)) actors.extend(env_actors) thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, False) threads.append(thread) context.push_env_thread(thread) print("Finished creating environments with %d games and %d actors" % (len(games), len(actors))) return context, games, actors, threads
def create_threads( num_thread, num_game_per_thread, actors, games, ): context = rela.Context() threads = [] for thread_idx in range(num_thread): env = hanalearn.HanabiVecEnv() for game_idx in range(num_game_per_thread): env.append(games[thread_idx * num_game_per_thread + game_idx]) thread = hanalearn.HanabiThreadLoop(actors[thread_idx], env, False) threads.append(thread) context.push_env_thread(thread) print("Finished creating %d threads with %d games and %d actors" % (len(threads), len(games), len(actors))) return context, threads
def create_eval_env( seed, num_thread, model_lockers, eval_eps, num_player, bomb, greedy_extra, log_prefix=None, ): context = rela.Context() games = [] for i in range(num_thread): game = hanalearn.HanabiEnv( { "players": str(num_player), "seed": str(seed + i), "bomb": str(bomb), }, -1, greedy_extra, False, ) games.append(game) env = rela.VectorEnv() env.append(game) env_actors = [] for j in range(num_player): env_actors.append(rela.R2D2Actor(model_lockers[j], 1, eval_eps)) if log_prefix is None: thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, True) else: log_file = os.path.join(log_prefix, "game%d.txt" % i) thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, True, log_file) context.push_env_thread(thread) return context, games