def drive(args): with open(args.conf.strip(), 'r') as f: config = json.load(f) modules = load_modules(config['models']) model = load_agent('imitation_learning_agent').load('lstm-2.h5') agent = ControlAgent(model, modules) experiment_suite = CoRL2017('Town01') run_driving_benchmark(agent, experiment_suite)
def __init__(self, url, cookie, model_path, rl): # Initialize all class variables self.commandHandlers = {} self.tables = {} self.username = '' self.ws = None self.games = {} # model loading and related # NOTE: last_action feature is not implemented AT ALL!!! self.rl = rl if rl: self.agent, cfgs = load_agent(model_path, { "device": "cpu", "vdn": False }) self.hide_action = cfgs["hide_action"] else: self.agent = load_supervised_agent(model_path, "cpu") # NOTE: this assumes every clone bot does not hide_action self.hide_action = False self.rnn_hids = {} self.next_moves = {} self.scores = [] # Initialize the Hanabi Live command handlers (for the lobby) self.commandHandlers['welcome'] = self.welcome self.commandHandlers['warning'] = self.warning self.commandHandlers['error'] = self.error self.commandHandlers['chat'] = self.chat self.commandHandlers['table'] = self.table self.commandHandlers['tableList'] = self.table_list self.commandHandlers['tableGone'] = self.table_gone self.commandHandlers['tableStart'] = self.table_start # Initialize the Hanabi Live command handlers (for the game) self.commandHandlers['init'] = self.init self.commandHandlers['gameAction'] = self.game_action self.commandHandlers['gameActionList'] = self.game_action_list self.commandHandlers['yourTurn'] = self.your_turn self.commandHandlers['databaseID'] = self.database_id # Start the WebSocket client print('Connecting to "' + url + '".') self.ws = websocket.WebSocketApp( url, on_message=lambda ws, message: self.websocket_message(ws, message), on_error=lambda ws, error: self.websocket_error(ws, error), on_open=lambda ws: self.websocket_open(ws), on_close=lambda ws: self.websocket_close(ws), cookie=cookie, ) self.ws.run_forever()
def main_run(plots_path, args, config): # load trained agent # plot trajectory if needed if args.trajectory: agent = utils.load_agent(args.logdir) trajectory, _, _, _, _, _ = agent.sample_trajectory(1) states_repr = agent.environment.system_nn.states_repr() plot_trajectory(trajectory, states_repr, path=plots_path) # plot tensorboard logs event_acc = EventAccumulator(args.logdir) event_acc.Reload() plot_tb_logs(TB_PLOTS[config["system_name"]], [event_acc], plots_path=plots_path)
def evaluate_saved_model( weight_files, num_game, seed, bomb, *, overwrite=None, num_run=1, verbose=True, ): agents = [] sad = [] hide_action = [] if overwrite is None: overwrite = {} overwrite["vdn"] = False overwrite["device"] = "cuda:0" overwrite["boltzmann_act"] = False for weight_file in weight_files: agent, cfg = utils.load_agent( weight_file, overwrite, ) agents.append(agent) sad.append(cfg["sad"] if "sad" in cfg else cfg["greedy_extra"]) hide_action.append(bool(cfg["hide_action"])) hand_size = cfg.get("hand_size", 5) assert all(s == sad[0] for s in sad) sad = sad[0] if all(h == hide_action[0] for h in hide_action): hide_action = hide_action[0] process_game = None else: hide_actions = hide_action process_game = lambda g: g.set_hide_actions(hide_actions) hide_action = False scores = [] perfect = 0 for i in range(num_run): _, _, score, p, _ = evaluate( agents, num_game, num_game * i + seed, bomb, 0, # eps sad, hide_action, process_game=process_game, hand_size=hand_size, ) scores.extend(score) perfect += p mean = np.mean(scores) sem = np.std(scores) / np.sqrt(len(scores)) perfect_rate = perfect / (num_game * num_run) if verbose: print( "score: %f +/- %f" % (mean, sem), "; perfect: %.2f%%" % (100 * perfect_rate) ) return mean, sem, perfect_rate, scores
score = states[0].get_score() if state0.life_tokens > 0 else 0 print("final score: ", score) return score root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(os.path.join(root, 'pyhanabi')) import r2d2 from utils import load_agent # weight = '/private/home/hengyuan/HanabiModels/rl1_fix_o/HIDE_ACTION1_PRED0.25_MIN_T0.01_MAX_T0.1_SEEDb/model0.pthw' weight = '/private/home/hengyuan/HanabiModels/br1_aux_big_cont/HIDE_ACTION1_RNN_HID_DIM768_ACT_BASE_EPS0.1_SEEDa/model0.pthw' # seed = 8 # deck = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/deck_seed%d.txt' % seed # ref = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/priv_s%d.pkl' % seed # agent, _ = load_agent(weight, {"device": "cpu", "vdn": False}) # score = run_game(deck, ref, [agent, agent], False) scores = [] for seed in range(1, 101): deck = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/deck_seed%d.txt' % seed ref = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/priv_s%d.pkl' % seed agent, _ = load_agent(weight, {"device": "cpu", "vdn": False}) score = run_game(deck, ref, [agent, agent], False) print('pass, ', seed) scores.append(score) import numpy as np print('avg score:', np.mean(scores))
from utils import load_agent, CurryEnv, DelayedLoadEnv from utils import get_env_and_policy_type, make_session, get_trained_kicker_locations from gather_statistics import get_emperical_score, get_agent_any_type samples = 20 env, pol_type = get_env_and_policy_type('kick-and-defend') sess = make_session() with sess: known_agent = load_agent( 'agent-zoo/kick-and-defend/defender/agent2_parameters-v1.pkl', pol_type, "known_policy", env, 1) attacked_agent = load_agent(get_trained_kicker_locations()[1], pol_type, "attacked", env, 0) #TODO Load Agent should be changed to "load_zoo_agent" #TODO Below is test for delayed start newenv = DelayedLoadEnv(env, get_trained_kicker_locations()[1], pol_type, "attacked3", 0, sess) #newenv = HackyFixForGoalie(newenv) trained_agent = get_agent_any_type('our_mlp', 'rando-ralph', pol_type, env) ties, win_loss = get_emperical_score(newenv, [trained_agent], samples,
def train(args): with open(args.conf, 'r') as f: config = json.load(f) agent = load_agent(config['agent']).init(config) agent.train()
def get_agent_any_type(type_opps, name, policy_type, env): if type_opps == "zoo": return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1) elif type_opps == "const": trained_agent = constant_agent_sampler() trained_agent.load(name) return trained_agent elif type_opps == "lstm": policy = LSTMPolicy(scope="agent_new", reuse=False, ob_space=env.observation_space.spaces[0], ac_space=env.action_space.spaces[0], hiddens=[128, 128], normalize=True) def get_action(observation): return policy.act(stochastic=True, observation=observation)[0] trained_agent = Agent(get_action, policy.reset) with open(name, "rb") as file: values_from_save = pickle.load(file) for key, value in values_from_save.items(): var = tf.get_default_graph().get_tensor_by_name(key) sess.run(tf.assign(var, value)) return trained_agent elif type_opps == "our_mlp": #TODO DO ANYTHING BUT THIS. THIS IS VERY DIRTY AND SAD :( def make_env(id): # TODO: seed (not currently supported) # TODO: VecNormalize? (typically good for MuJoCo) # TODO: baselines logger? # TODO: we're loading identical policy weights into different # variables, this is to work-around design choice of Agent's # having state stored inside of them. sess = utils.make_session() with sess.as_default(): multi_env = env attacked_agent = constant_agent_sampler(act_dim=8, magnitude=100) single_env = Gymify( MultiToSingle(CurryEnv(multi_env, attacked_agent))) single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0') # TODO: upgrade Gym so don't have to do thi0s single_env.observation_space.dtype = np.dtype(np.float32) return single_env # TODO: close session? #TODO DO NOT EVEN READ THE ABOVE CODE :'( denv = SubprocVecEnv([functools.partial(make_env, 0)]) model = ppo2.learn(network="mlp", env=denv, total_timesteps=1, seed=0, nminibatches=4, log_interval=1, save_interval=1, load_path=name) stateful_model = StatefulModel(denv, model) trained_agent = utils.Agent(action_selector=stateful_model.get_action, reseter=stateful_model.reset) return trained_agent raise (Exception('Agent type unrecognized'))
#ties, win_loss = evaluate_agent(attacked_agent, configs.agent_type, configs.agent_to_eval, policy_type, env,configs.samples, # not configs.no_visuals, silent=configs.nearly_silent) if configs.zero_const: if configs.env == "sumo-ants": trained_agent = constant_zero_agent(act_dim=8) elif configs.env == "kick-and-defend": trained_agent = constant_zero_agent(act_dim=17) else: raise (Exception("Unsupported Env")) elif configs.random_const: trained_agent = constant_agent_sampler(act_dim=17) else: trained_agent = get_agent_any_type(configs.agent_type, configs.agent_to_eval, policy_type, env) attacked_agent = load_agent(pretrained_agent, policy_type, "zoo_ant_policy4", env, 0) agents = [attacked_agent, trained_agent] ties, win_loss = get_emperical_score(env, agents, configs.samples, render=not configs.no_visuals, silent=configs.nearly_silent) # print("After {} trials the tiecount was {} and the wincounts were {}".format(samples, if not configs.csvmode: print( "[MAGIC NUMBER 87623123] In {} trials {} acheived {} Ties and winrates {}" .format(configs.samples, configs.agent_to_eval, ties, win_loss))