Exemplo n.º 1
0
def run_task(v):
    record_video = False

    import mujoco_envs.pomdp
    main_env = GymEnv('Peg3d-v0', record_video=record_video)
    # main_env = MultiagentEnv(GymEnv("Swimmer-v1", record_video=record_video))

    # replace raw shadow_envs with wrapped envs
    main_env._shadow_envs = [TfEnv(ProxyEnv(env)) for env in main_env.shadow_envs]
    # main_env._shadow_envs = [TfEnv(normalize(env)) for env in main_env.shadow_envs]

    sub_policies = [AutoMLPPolicy(
    # sub_policies = [BottleneckAutoMLPPolicy(
        name="sub-policy-%s" % i,
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32) # 32)
    ) for i,env in enumerate(main_env.shadow_envs)]

    # reduces the initialization, to discourage pre-commiting to an action
    # for sp in sub_policies:
    #     import ipdb; ipdb.set_trace()
    #     sp.get_params()[-3].set_value(sp.get_params()[-3].get_value()*0.01)
    policy = MultiMLPPolicy(
        name="policy",
        env_spec=[env.spec for env in main_env.shadow_envs],
        policies=sub_policies
    )

    baselines = [LinearFeatureBaseline(env_spec=env.spec) for env in main_env.shadow_envs]

    # TODO(cathywu) Start with large batch sizes 100-1000 trajectories
    algo = MultiTRPO(
        env=main_env,
        policy=policy,
        baselines=baselines,
        batch_size=25000,
        whole_paths=True,
        max_path_length=250,
        n_itr=700,
        discount=0.995,
        step_size=v["step_size"],
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
        # NPO_cls=ConsensusNPO,
        NPO_cls=NPO,
        sample_processor_cls=MultiSampleProcessor,
        n_vectorized_envs=40,
    )
    algo.train()
Exemplo n.º 2
0
        Walker2DEnv,
        SwimmerEnv,
        SimpleHumanoidEnv,
        InvertedDoublePendulumEnv,
        HopperEnv,
        HalfCheetahEnv,
        PointGatherEnv,
        SwimmerGatherEnv,
        AntGatherEnv,
        PointMazeEnv,
        SwimmerMazeEnv,
        AntMazeEnv,
    ])

envs = [cls() for cls in simple_env_classes]
envs.append(ProxyEnv(envs[0]))
envs.append(IdentificationEnv(CartpoleEnv, {}))
envs.append(NoisyObservationEnv(CartpoleEnv()))
envs.append(DelayedActionEnv(CartpoleEnv()))
envs.append(NormalizedEnv(CartpoleEnv()))
envs.append(GymEnv('CartPole-v0'))


@tools.params(*envs)
def test_env(env):
    print("Testing", env.__class__)
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
    assert ob_space.contains(ob)
    a = act_space.sample()