示例#1
0
def test_export_rcspysim(env, policy, tmpdir):
    from rcsenv import ControlPolicy

    # Generate scripted version (in double mode for CPP compatibility)
    scripted = policy.double().script()
    print(scripted.graph)

    # Export
    export_file = osp.join(tmpdir, 'policy.pt')
    to.jit.save(scripted, export_file)

    # Import in C
    cpp = ControlPolicy('torch', export_file)

    # Compare a couple of inputs
    for _ in range(50):
        obs = policy.env_spec.obs_space.sample_uniform()
        act_script = scripted(to.from_numpy(obs)).numpy()
        act_cpp = cpp(obs, policy.env_spec.act_space.flat_dim)
        assert act_cpp == pytest.approx(act_script)

    # Test after reset
    if hasattr(scripted, 'reset'):
        scripted.reset()
        cpp.reset()
        obs = policy.env_spec.obs_space.sample_uniform()
        act_script = scripted(to.from_numpy(obs)).numpy()
        act_cpp = cpp(obs, policy.env_spec.act_space.flat_dim)
        assert act_cpp == pytest.approx(act_script)
示例#2
0
        raise NotImplementedError

    # Trace the policy
    #     traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden()))
    #     print(traced_net.graph)
    #     print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None))

    stateful_net = script(StatefulRecurrentNetwork(net))
    print(stateful_net.graph)
    print(stateful_net.reset.graph)
    print(list(stateful_net.named_parameters()))

    stateful_net.save(tmpfile)

    # Load in c
    cp = ControlPolicy("torch", tmpfile)

    inputs = [
        [1.0, 2.0, 3.0, 4.0],
        [3.0, 4.0, 5.0, 6.0],
    ]

    hid_man = net.init_hidden()
    for inp in inputs:
        # Execute manually
        out_man, hid_man = net(to.tensor(inp), hid_man)
        # Execute script
        out_sc = stateful_net(to.tensor(inp))
        # Execute C++
        out_cp = cp(np.array(inp), 2)
示例#3
0
        raise NotImplementedError

    # Trace the policy
    #     traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden()))
    #     print(traced_net.graph)
    #     print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None))

    stateful_net = script(StatefulRecurrentNetwork(net))
    print(stateful_net.graph)
    print(stateful_net.reset.graph)
    print(list(stateful_net.named_parameters()))

    stateful_net.save(tmpfile)

    # Load in c
    cp = ControlPolicy('torch', tmpfile)

    inputs = [
        [1., 2., 3., 4.],
        [3., 4., 5., 6.],
    ]

    hid_man = net.init_hidden()
    for inp in inputs:
        # Execute manually
        out_man, hid_man = net(to.tensor(inp), hid_man)
        # Execute script
        out_sc = stateful_net(to.tensor(inp))
        # Execute C++
        out_cp = cp(np.array(inp), 2)