Пример #1
0
def build_agent(env, env_id, agent_configs):
    agent_configs = {}
    if (env_id in AWR_CONFIGS):
        agent_configs.update(AWR_CONFIGS[env_id])

    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    agent = awr_agent.AWRAgent(env=env, sess=sess, **agent_configs)

    return agent
Пример #2
0
def build_agent(env):
    env_id = arg_parser.env
    agent_configs = {}
    if (env_id in awr_configs.AWR_CONFIGS):
        agent_configs = awr_configs.AWR_CONFIGS[env_id]

    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    agent = awr_agent.AWRAgent(env=env, sess=sess, **agent_configs)

    return agent
Пример #3
0
                .format(data_std, data_std), "rb")), args.constraint)

    terminals = []
    for el in datas:
        terminal = np.ones(len(el[0]))
        terminal[-1] = 0
        terminals.append(terminal)

    datas2 = []
    for el, terminal in zip(datas, terminals):
        datas2.append((el[0], el[1], el[2], terminal))

    env = gym.make("Reacher-v2")
    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    agent = awr_agent.AWRAgent(env=env, sess=sess, **configs)
    agent.load_model(
        "../output/Reacher-v2_{}_offline/model.ckpt".format(data_std))

    qnn = Q(agent.get_state_size(), agent.get_action_size(), 0.001).cuda()

    eval_std = args.eval_std  # For each, try different eval_std
    num_epochs = args.n_epochs
    FQE = FittedQEvaluation(qnn)
    policy = Policy(agent, eval_std)

    FQE.fit_Q(policy, datas2, num_epochs, agent._discount)

    vals0 = []
    for _ in tqdm(range(100)):
        path = rollout_path(agent, eval_std)