def test_export_rcspysim(env, policy, tmpdir): from rcsenv import ControlPolicy # Generate scripted version (in double mode for CPP compatibility) scripted = policy.double().script() print(scripted.graph) # Export export_file = osp.join(tmpdir, 'policy.pt') to.jit.save(scripted, export_file) # Import in C cpp = ControlPolicy('torch', export_file) # Compare a couple of inputs for _ in range(50): obs = policy.env_spec.obs_space.sample_uniform() act_script = scripted(to.from_numpy(obs)).numpy() act_cpp = cpp(obs, policy.env_spec.act_space.flat_dim) assert act_cpp == pytest.approx(act_script) # Test after reset if hasattr(scripted, 'reset'): scripted.reset() cpp.reset() obs = policy.env_spec.obs_space.sample_uniform() act_script = scripted(to.from_numpy(obs)).numpy() act_cpp = cpp(obs, policy.env_spec.act_space.flat_dim) assert act_cpp == pytest.approx(act_script)
raise NotImplementedError # Trace the policy # traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden())) # print(traced_net.graph) # print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None)) stateful_net = script(StatefulRecurrentNetwork(net)) print(stateful_net.graph) print(stateful_net.reset.graph) print(list(stateful_net.named_parameters())) stateful_net.save(tmpfile) # Load in c cp = ControlPolicy("torch", tmpfile) inputs = [ [1.0, 2.0, 3.0, 4.0], [3.0, 4.0, 5.0, 6.0], ] hid_man = net.init_hidden() for inp in inputs: # Execute manually out_man, hid_man = net(to.tensor(inp), hid_man) # Execute script out_sc = stateful_net(to.tensor(inp)) # Execute C++ out_cp = cp(np.array(inp), 2)
raise NotImplementedError # Trace the policy # traced_net = trace(net, (to.from_numpy(net.env_spec.obs_space.sample_uniform()), net.init_hidden())) # print(traced_net.graph) # print(traced_net(to.from_numpy(net.env_spec.obs_space.sample_uniform()), None)) stateful_net = script(StatefulRecurrentNetwork(net)) print(stateful_net.graph) print(stateful_net.reset.graph) print(list(stateful_net.named_parameters())) stateful_net.save(tmpfile) # Load in c cp = ControlPolicy('torch', tmpfile) inputs = [ [1., 2., 3., 4.], [3., 4., 5., 6.], ] hid_man = net.init_hidden() for inp in inputs: # Execute manually out_man, hid_man = net(to.tensor(inp), hid_man) # Execute script out_sc = stateful_net(to.tensor(inp)) # Execute C++ out_cp = cp(np.array(inp), 2)