Пример #1
0
def trained_disturber(variant):
    env_name = variant['env_name']
    env = get_env_from_name(env_name)
    env_params = variant['env_params']

    eval_params = variant['eval_params']
    policy_params = variant['alg_params']
    disturber_params = variant['disturber_params']
    build_func = get_policy(variant['algorithm_name'])
    if 'Fetch' in env_name or 'Hand' in env_name:
        s_dim = env.observation_space.spaces['observation'].shape[0] \
                + env.observation_space.spaces['achieved_goal'].shape[0] + \
                env.observation_space.spaces['desired_goal'].shape[0]
    else:
        s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.shape[0]
    d_dim = env_params['disturbance dim']
    policy = build_func(a_dim, s_dim, d_dim, policy_params)
    disturbance_chanel_list = np.nonzero(
        disturber_params['disturbance_magnitude'])[0]
    disturber_params['disturbance_chanel_list'] = disturbance_chanel_list
    disturber = Disturber(d_dim, s_dim, disturber_params)
    disturber.restore(eval_params['path'])

    log_path = variant['log_path'] + '/eval/trained_disturber'
    variant['eval_params'].update({'magnitude': 0})
    logger.configure(dir=log_path, format_strs=['csv'])

    diagnostic_dict, _ = evaluation(variant, env, policy, disturber)

    string_to_print = []
    [
        string_to_print.extend(
            [key, ':', str(round(diagnostic_dict[key], 2)), '|'])
        for key in diagnostic_dict.keys()
    ]
    print(''.join(string_to_print))

    [logger.logkv(key, diagnostic_dict[key]) for key in diagnostic_dict.keys()]
    logger.dumpkvs()
def trained_disturber(variant):
    env_name = variant["env_name"]
    env = get_env_from_name(env_name)
    env_params = variant["env_params"]

    eval_params = variant["eval_params"]
    policy_params = variant["alg_params"]
    disturber_params = variant["disturber_params"]
    build_func = get_policy(variant["algorithm_name"])
    if "Fetch" in env_name or "Hand" in env_name:
        s_dim = (env.observation_space.spaces["observation"].shape[0] +
                 env.observation_space.spaces["achieved_goal"].shape[0] +
                 env.observation_space.spaces["desired_goal"].shape[0])
    else:
        s_dim = env.observation_space.shape[0]
    a_dim = env.action_space.shape[0]
    d_dim = env_params["disturbance dim"]
    policy = build_func(a_dim, s_dim, d_dim, policy_params)
    disturbance_chanel_list = np.nonzero(
        disturber_params["disturbance_magnitude"])[0]
    disturber_params["disturbance_chanel_list"] = disturbance_chanel_list
    disturber = Disturber(d_dim, s_dim, disturber_params)
    disturber.restore(eval_params["path"])

    log_path = variant["log_path"] + "/eval/trained_disturber"
    variant["eval_params"].update({"magnitude": 0})
    logger.configure(dir=log_path, format_strs=["csv"])

    diagnostic_dict, _ = evaluation(variant, env, policy, disturber)

    string_to_print = []
    [
        string_to_print.extend(
            [key, ":", str(round(diagnostic_dict[key], 2)), "|"])
        for key in diagnostic_dict.keys()
    ]
    print("".join(string_to_print))

    [logger.logkv(key, diagnostic_dict[key]) for key in diagnostic_dict.keys()]
    logger.dumpkvs()