Python compute_minmax示例

编程语言: Python

命名空间/包名称: Environments.state_definition

方法/功能: compute_minmax

hotexamples.com的示例: 3

Python compute_minmax - 已找到3个示例。这些是从开源项目中提取的最受好评的Environments.state_definition.compute_minmax现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： pretrain_models.py 项目： CalCharles/contingency-options

def get_states(args,
               true_environment,
               length_constraint=50000,
               raws=None,
               dumps=None):
    dataset_path = args.record_rollouts
    changepoint_path = args.changepoint_dir
    option_chain = OptionChain(true_environment, args.changepoint_dir,
                               args.train_edge, args)
    environments = option_chain.initialize(args)
    print(environments)
    proxy_environment = environments.pop(-1)
    head, tail = get_edge(args.train_edge)
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    use_raw = 'raw' in args.state_forms
    state_class.minmax = compute_minmax(state_class, dataset_path)
    states, resps, raws, dumps = load_states(
        state_class.get_state,
        dataset_path,
        length_constraint=length_constraint,
        use_raw=use_raw,
        raws=raws,
        dumps=dumps)
    return states, resps, num_actions, state_class, environments, raws, dumps

示例#2

显示文件

文件： add_edge.py 项目： CalCharles/contingency-options

        train_models = proxy_environment.models
    else:
        train_models = MultiOption(len(reward_paths), models[args.model_form])
    proxy_chain = environments
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    print(args.state_names, args.state_forms)
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    state_class.minmax = compute_minmax(state_class,
                                        dataset_path,
                                        filename=args.focus_dumps_name)
    if args.normalize:
        minv = []
        maxv = []
        for f in args.state_forms:
            if f == 'prox':
                minv += [-84, -84]
                maxv += [84, 84]
            elif f == 'bounds':
                minv += [0, 0]
                maxv += [84, 84]
        state_class.minmax = np.stack((np.array(minv), np.array(maxv)))
        print(state_class.minmax)

    behavior_policy = behavior_policies[args.behavior_policy]()

示例#3

显示文件

文件： dopamine_paddle.py 项目： CalCharles/contingency-options

    train_models = MultiOption(len(reward_classes), models[args.model_form])
    environments = option_chain.initialize(args)
    proxy_environment = environments.pop(-1)
    proxy_chain = environments
    if len(
            environments
    ) > 1:  # there is a difference in the properties of a proxy environment and the true environment
        num_actions = len(environments[-1].reward_fns)
    else:
        num_actions = environments[-1].num_actions
    print(args.state_names, args.state_forms)
    state_class = GetState(head,
                           state_forms=list(
                               zip(args.state_names, args.state_forms)))
    # softcomputed minmax (buggy)
    state_class.minmax = compute_minmax(state_class, dataset_path)
    # HARDCODED MINMAX AT 84,84!!!!
    minv = []
    maxv = []
    for f in args.state_forms:
        if f == 'prox':
            minv += [-84, -84]
            maxv += [84, 84]
        elif f == 'bounds':
            minv += [0, 0]
            maxv += [84, 84]
    state_class.minmax = np.stack((np.array(minv), np.array(maxv)))
    print("state class minmax", state_class.minmax)

    for reward_class in reward_classes:
        reward_class.traj_dim = state_class.shape