示例#1
0
def drive(args):
    with open(args.conf.strip(), 'r') as f:
        config = json.load(f)

    modules = load_modules(config['models'])
    model = load_agent('imitation_learning_agent').load('lstm-2.h5')

    agent = ControlAgent(model, modules)
    experiment_suite = CoRL2017('Town01')
    run_driving_benchmark(agent, experiment_suite)
    def __init__(self, url, cookie, model_path, rl):
        # Initialize all class variables
        self.commandHandlers = {}
        self.tables = {}
        self.username = ''
        self.ws = None
        self.games = {}

        # model loading and related
        # NOTE: last_action feature is not implemented AT ALL!!!
        self.rl = rl
        if rl:
            self.agent, cfgs = load_agent(model_path, {
                "device": "cpu",
                "vdn": False
            })
            self.hide_action = cfgs["hide_action"]
        else:
            self.agent = load_supervised_agent(model_path, "cpu")
            # NOTE: this assumes every clone bot does not hide_action
            self.hide_action = False
        self.rnn_hids = {}
        self.next_moves = {}
        self.scores = []

        # Initialize the Hanabi Live command handlers (for the lobby)
        self.commandHandlers['welcome'] = self.welcome
        self.commandHandlers['warning'] = self.warning
        self.commandHandlers['error'] = self.error
        self.commandHandlers['chat'] = self.chat
        self.commandHandlers['table'] = self.table
        self.commandHandlers['tableList'] = self.table_list
        self.commandHandlers['tableGone'] = self.table_gone
        self.commandHandlers['tableStart'] = self.table_start

        # Initialize the Hanabi Live command handlers (for the game)
        self.commandHandlers['init'] = self.init
        self.commandHandlers['gameAction'] = self.game_action
        self.commandHandlers['gameActionList'] = self.game_action_list
        self.commandHandlers['yourTurn'] = self.your_turn
        self.commandHandlers['databaseID'] = self.database_id

        # Start the WebSocket client
        print('Connecting to "' + url + '".')

        self.ws = websocket.WebSocketApp(
            url,
            on_message=lambda ws, message: self.websocket_message(ws, message),
            on_error=lambda ws, error: self.websocket_error(ws, error),
            on_open=lambda ws: self.websocket_open(ws),
            on_close=lambda ws: self.websocket_close(ws),
            cookie=cookie,
        )
        self.ws.run_forever()
def main_run(plots_path, args, config):
    # load trained agent


    # plot trajectory if needed
    if args.trajectory:
        agent = utils.load_agent(args.logdir)
        trajectory, _, _, _, _, _ = agent.sample_trajectory(1)
        states_repr = agent.environment.system_nn.states_repr()
        plot_trajectory(trajectory, states_repr, path=plots_path)

    # plot tensorboard logs
    event_acc = EventAccumulator(args.logdir)
    event_acc.Reload()
    plot_tb_logs(TB_PLOTS[config["system_name"]], [event_acc], plots_path=plots_path)
示例#4
0
def evaluate_saved_model(
    weight_files,
    num_game,
    seed,
    bomb,
    *,
    overwrite=None,
    num_run=1,
    verbose=True,
):
    agents = []
    sad = []
    hide_action = []
    if overwrite is None:
        overwrite = {}
    overwrite["vdn"] = False
    overwrite["device"] = "cuda:0"
    overwrite["boltzmann_act"] = False

    for weight_file in weight_files:
        agent, cfg = utils.load_agent(
            weight_file,
            overwrite,
        )
        agents.append(agent)
        sad.append(cfg["sad"] if "sad" in cfg else cfg["greedy_extra"])
        hide_action.append(bool(cfg["hide_action"]))

    hand_size = cfg.get("hand_size", 5)

    assert all(s == sad[0] for s in sad)
    sad = sad[0]
    if all(h == hide_action[0] for h in hide_action):
        hide_action = hide_action[0]
        process_game = None
    else:
        hide_actions = hide_action
        process_game = lambda g: g.set_hide_actions(hide_actions)
        hide_action = False

    scores = []
    perfect = 0
    for i in range(num_run):
        _, _, score, p, _ = evaluate(
            agents,
            num_game,
            num_game * i + seed,
            bomb,
            0,  # eps
            sad,
            hide_action,
            process_game=process_game,
            hand_size=hand_size,
        )
        scores.extend(score)
        perfect += p

    mean = np.mean(scores)
    sem = np.std(scores) / np.sqrt(len(scores))
    perfect_rate = perfect / (num_game * num_run)
    if verbose:
        print(
            "score: %f +/- %f" % (mean, sem), "; perfect: %.2f%%" % (100 * perfect_rate)
        )
    return mean, sem, perfect_rate, scores
示例#5
0
    score = states[0].get_score() if state0.life_tokens > 0 else 0
    print("final score: ", score)
    return score


root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(root, 'pyhanabi'))
import r2d2
from utils import load_agent

# weight = '/private/home/hengyuan/HanabiModels/rl1_fix_o/HIDE_ACTION1_PRED0.25_MIN_T0.01_MAX_T0.1_SEEDb/model0.pthw'
weight = '/private/home/hengyuan/HanabiModels/br1_aux_big_cont/HIDE_ACTION1_RNN_HID_DIM768_ACT_BASE_EPS0.1_SEEDa/model0.pthw'

# seed = 8
# deck = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/deck_seed%d.txt' % seed
# ref = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/priv_s%d.pkl' % seed
# agent, _ = load_agent(weight, {"device": "cpu", "vdn": False})
# score = run_game(deck, ref, [agent, agent], False)

scores = []
for seed in range(1, 101):
    deck = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/deck_seed%d.txt' % seed
    ref = '/private/home/hengyuan/NewHanabi/rl/pyhanabi/exps/play2/priv_s%d.pkl' % seed
    agent, _ = load_agent(weight, {"device": "cpu", "vdn": False})
    score = run_game(deck, ref, [agent, agent], False)
    print('pass, ', seed)
    scores.append(score)

import numpy as np
print('avg score:', np.mean(scores))
示例#6
0
from utils import load_agent, CurryEnv, DelayedLoadEnv
from utils import get_env_and_policy_type, make_session, get_trained_kicker_locations

from gather_statistics import get_emperical_score, get_agent_any_type

samples = 20

env, pol_type = get_env_and_policy_type('kick-and-defend')

sess = make_session()
with sess:
    known_agent = load_agent(
        'agent-zoo/kick-and-defend/defender/agent2_parameters-v1.pkl',
        pol_type, "known_policy", env, 1)

    attacked_agent = load_agent(get_trained_kicker_locations()[1], pol_type,
                                "attacked", env, 0)

    #TODO Load Agent should be changed to "load_zoo_agent"

    #TODO Below is test for delayed start

    newenv = DelayedLoadEnv(env,
                            get_trained_kicker_locations()[1], pol_type,
                            "attacked3", 0, sess)
    #newenv = HackyFixForGoalie(newenv)

    trained_agent = get_agent_any_type('our_mlp', 'rando-ralph', pol_type, env)

    ties, win_loss = get_emperical_score(newenv, [trained_agent],
                                         samples,
示例#7
0
文件: train.py 项目: affinis-lab/core
def train(args):
    with open(args.conf, 'r') as f:
        config = json.load(f)

    agent = load_agent(config['agent']).init(config)
    agent.train()
示例#8
0
def get_agent_any_type(type_opps, name, policy_type, env):
    if type_opps == "zoo":
        return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1)
    elif type_opps == "const":
        trained_agent = constant_agent_sampler()
        trained_agent.load(name)
        return trained_agent
    elif type_opps == "lstm":
        policy = LSTMPolicy(scope="agent_new",
                            reuse=False,
                            ob_space=env.observation_space.spaces[0],
                            ac_space=env.action_space.spaces[0],
                            hiddens=[128, 128],
                            normalize=True)

        def get_action(observation):
            return policy.act(stochastic=True, observation=observation)[0]

        trained_agent = Agent(get_action, policy.reset)

        with open(name, "rb") as file:
            values_from_save = pickle.load(file)

        for key, value in values_from_save.items():
            var = tf.get_default_graph().get_tensor_by_name(key)
            sess.run(tf.assign(var, value))

        return trained_agent
    elif type_opps == "our_mlp":
        #TODO DO ANYTHING BUT THIS.  THIS IS VERY DIRTY AND SAD :(
        def make_env(id):
            # TODO: seed (not currently supported)
            # TODO: VecNormalize? (typically good for MuJoCo)
            # TODO: baselines logger?
            # TODO: we're loading identical policy weights into different
            # variables, this is to work-around design choice of Agent's
            # having state stored inside of them.
            sess = utils.make_session()
            with sess.as_default():
                multi_env = env

                attacked_agent = constant_agent_sampler(act_dim=8,
                                                        magnitude=100)

                single_env = Gymify(
                    MultiToSingle(CurryEnv(multi_env, attacked_agent)))
                single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0')

                # TODO: upgrade Gym so don't have to do thi0s
                single_env.observation_space.dtype = np.dtype(np.float32)
            return single_env
            # TODO: close session?

        #TODO DO NOT EVEN READ THE ABOVE CODE :'(

        denv = SubprocVecEnv([functools.partial(make_env, 0)])

        model = ppo2.learn(network="mlp",
                           env=denv,
                           total_timesteps=1,
                           seed=0,
                           nminibatches=4,
                           log_interval=1,
                           save_interval=1,
                           load_path=name)

        stateful_model = StatefulModel(denv, model)
        trained_agent = utils.Agent(action_selector=stateful_model.get_action,
                                    reseter=stateful_model.reset)

        return trained_agent
    raise (Exception('Agent type unrecognized'))
示例#9
0
            #ties, win_loss = evaluate_agent(attacked_agent, configs.agent_type, configs.agent_to_eval, policy_type, env,configs.samples,
            #              not configs.no_visuals, silent=configs.nearly_silent)
            if configs.zero_const:
                if configs.env == "sumo-ants":
                    trained_agent = constant_zero_agent(act_dim=8)
                elif configs.env == "kick-and-defend":
                    trained_agent = constant_zero_agent(act_dim=17)
                else:
                    raise (Exception("Unsupported Env"))
            elif configs.random_const:
                trained_agent = constant_agent_sampler(act_dim=17)
            else:
                trained_agent = get_agent_any_type(configs.agent_type,
                                                   configs.agent_to_eval,
                                                   policy_type, env)
            attacked_agent = load_agent(pretrained_agent, policy_type,
                                        "zoo_ant_policy4", env, 0)

            agents = [attacked_agent, trained_agent]
            ties, win_loss = get_emperical_score(env,
                                                 agents,
                                                 configs.samples,
                                                 render=not configs.no_visuals,
                                                 silent=configs.nearly_silent)

            # print("After {} trials the tiecount was {} and the wincounts were {}".format(samples,

            if not configs.csvmode:
                print(
                    "[MAGIC NUMBER 87623123] In {} trials {} acheived {} Ties and winrates {}"
                    .format(configs.samples, configs.agent_to_eval, ties,
                            win_loss))