def record_demo(use_discrete, num_visual=0, num_vector=1):
     env = RecordEnvironment(
         [BRAIN_NAME],
         use_discrete=use_discrete,
         num_visual=num_visual,
         num_vector=num_vector,
         n_demos=100,
     )
     # If we want to use true demos, we can solve the env in the usual way
     # Otherwise, we can just call solve to execute the optimal policy
     env.solve()
     agent_info_protos = env.demonstration_protos[BRAIN_NAME]
     meta_data_proto = DemonstrationMetaProto()
     brain_param_proto = BrainParametersProto(
         vector_action_size=[2] if use_discrete else [1],
         vector_action_descriptions=[""],
         vector_action_space_type=discrete if use_discrete else continuous,
         brain_name=BRAIN_NAME,
         is_training=True,
     )
     action_type = "Discrete" if use_discrete else "Continuous"
     demo_path_name = "1DTest" + action_type + ".demo"
     demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
     write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
     return demo_path
Exemplo n.º 2
0
 def record_demo(action_sizes, num_visual=0, num_vector=1):
     env = RecordEnvironment(
         [BRAIN_NAME],
         action_sizes=action_sizes,
         num_visual=num_visual,
         num_vector=num_vector,
         n_demos=100,
     )
     # If we want to use true demos, we can solve the env in the usual way
     # Otherwise, we can just call solve to execute the optimal policy
     env.solve()
     agent_info_protos = env.demonstration_protos[BRAIN_NAME]
     meta_data_proto = DemonstrationMetaProto()
     continuous_action_size, discrete_action_size = action_sizes
     action_spec_proto = ActionSpecProto(
         num_continuous_actions=continuous_action_size,
         num_discrete_actions=discrete_action_size,
         discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
     )
     brain_param_proto = BrainParametersProto(brain_name=BRAIN_NAME,
                                              is_training=True,
                                              action_spec=action_spec_proto)
     action_type = "Discrete" if action_sizes else "Continuous"
     demo_path_name = "1DTest" + action_type + ".demo"
     demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
     write_demo(demo_path, meta_data_proto, brain_param_proto,
                agent_info_protos)
     return demo_path