def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int ) -> None: env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads( pickled_env_factory ) env = env_factory(worker_id) def _send_response(cmd_name, payload): parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) try: while True: cmd: EnvironmentCommand = parent_conn.recv() if cmd.name == "step": all_action_info = cmd.payload actions = {} memories = {} texts = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action memories[brain_name] = action_info.memory texts[brain_name] = action_info.text values[brain_name] = action_info.value all_brain_info = env.step(actions, memories, texts, values) # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. step_response = StepResponse(all_brain_info, get_timer_root()) step_queue.put(EnvironmentResponse("step", worker_id, step_response)) reset_timers() elif cmd.name == "external_brains": _send_response("external_brains", env.external_brains) elif cmd.name == "reset_parameters": _send_response("reset_parameters", env.reset_parameters) elif cmd.name == "reset": all_brain_info = env.reset( cmd.payload[0], cmd.payload[1], cmd.payload[2] ) _send_response("reset", all_brain_info) elif cmd.name == "close": break except (KeyboardInterrupt, UnityCommunicationException): logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put(EnvironmentResponse("env_close", worker_id, None)) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")
def worker(parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int): env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) env = env_factory(worker_id) def _send_response(cmd_name, payload): parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) try: while True: cmd: EnvironmentCommand = parent_conn.recv() if cmd.name == "step": all_action_info = cmd.payload # When an environment is "global_done" it means automatic agent reset won't occur, so we need # to perform an academy reset. if env.global_done: all_brain_info = env.reset() else: actions = {} memories = {} texts = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action memories[brain_name] = action_info.memory texts[brain_name] = action_info.text values[brain_name] = action_info.value all_brain_info = env.step(actions, memories, texts, values) # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. step_response = StepResponse(all_brain_info, get_timer_root()) step_queue.put( EnvironmentResponse("step", worker_id, step_response)) reset_timers() elif cmd.name == "external_brains": _send_response("external_brains", env.external_brains) elif cmd.name == "reset_parameters": _send_response("reset_parameters", env.reset_parameters) elif cmd.name == "reset": all_brain_info = env.reset(cmd.payload[0], cmd.payload[1], cmd.payload[2]) _send_response("reset", all_brain_info) elif cmd.name == "global_done": _send_response("global_done", env.global_done) elif cmd.name == "close": break except (KeyboardInterrupt, UnityCommunicationException): print("UnityEnvironment worker: environment stopping.") step_queue.put(EnvironmentResponse("env_close", worker_id, None)) finally: step_queue.close() env.close()
def worker( parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int, engine_configuration: EngineConfig, ) -> None: env_factory: Callable[[int, List[SideChannel]], UnityEnvironment] = cloudpickle.loads( pickled_env_factory) shared_float_properties = FloatPropertiesChannel() engine_configuration_channel = EngineConfigurationChannel() engine_configuration_channel.set_configuration(engine_configuration) env: BaseEnv = env_factory( worker_id, [shared_float_properties, engine_configuration_channel]) def _send_response(cmd_name, payload): parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload)) def _generate_all_brain_info() -> AllBrainInfo: all_brain_info = {} for brain_name in env.get_agent_groups(): all_brain_info[brain_name] = step_result_to_brain_info( env.get_step_result(brain_name), env.get_agent_group_spec(brain_name), worker_id, ) return all_brain_info def external_brains(): result = {} for brain_name in env.get_agent_groups(): result[brain_name] = group_spec_to_brain_parameters( brain_name, env.get_agent_group_spec(brain_name)) return result try: while True: cmd: EnvironmentCommand = parent_conn.recv() if cmd.name == "step": all_action_info = cmd.payload for brain_name, action_info in all_action_info.items(): if len(action_info.action) != 0: env.set_actions(brain_name, action_info.action) env.step() all_brain_info = _generate_all_brain_info() # The timers in this process are independent from all the processes and the "main" process # So after we send back the root timer, we can safely clear them. # Note that we could randomly return timers a fraction of the time if we wanted to reduce # the data transferred. # TODO get gauges from the workers and merge them in the main process too. step_response = StepResponse(all_brain_info, get_timer_root()) step_queue.put( EnvironmentResponse("step", worker_id, step_response)) reset_timers() elif cmd.name == "external_brains": _send_response("external_brains", external_brains()) elif cmd.name == "get_properties": reset_params = {} for k in shared_float_properties.list_properties(): reset_params[k] = shared_float_properties.get_property(k) _send_response("get_properties", reset_params) elif cmd.name == "reset": for k, v in cmd.payload.items(): shared_float_properties.set_property(k, v) env.reset() all_brain_info = _generate_all_brain_info() _send_response("reset", all_brain_info) elif cmd.name == "close": break except (KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException): logger.info( f"UnityEnvironment worker {worker_id}: environment stopping.") step_queue.put(EnvironmentResponse("env_close", worker_id, None)) finally: # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread() # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for # more info. logger.debug(f"UnityEnvironment worker {worker_id} closing.") step_queue.cancel_join_thread() step_queue.close() env.close() logger.debug(f"UnityEnvironment worker {worker_id} done.")