def __init__(self, process: Process, worker_id: int, conn: Connection): self.process = process self.worker_id = worker_id self.conn = conn self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None) self.previous_all_action_info: Dict[str, ActionInfo] = {} self.waiting = False
def __init__(self, env: BaseEnv, float_prop_channel: FloatPropertiesChannel): super().__init__() self.shared_float_properties = float_prop_channel self.env = env self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None) self.previous_all_action_info: Dict[str, ActionInfo] = {}
def _postprocess_steps( self, env_steps: List[EnvironmentResponse] ) -> List[EnvironmentStep]: step_infos = [] timer_nodes = [] for step in env_steps: payload: StepResponse = step.payload env_worker = self.env_workers[step.worker_id] new_step = EnvironmentStep( env_worker.previous_step.current_all_brain_info, payload.all_brain_info, env_worker.previous_all_action_info, ) step_infos.append(new_step) env_worker.previous_step = new_step if payload.timer_root: timer_nodes.append(payload.timer_root) if timer_nodes: with hierarchical_timer("workers") as main_timer_node: for worker_timer_node in timer_nodes: main_timer_node.merge( worker_timer_node, root_name="worker_root", is_parallel=True ) return step_infos
def reset( self, config: Dict[str, float] = None ) -> List[EnvironmentStep]: # type: ignore if config is not None: for k, v in config.items(): self.shared_float_properties.set_property(k, v) all_brain_info = self.env.reset() self.previous_step = EnvironmentStep(None, all_brain_info, None) return [self.previous_step]
def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]: while any(ew.waiting for ew in self.env_workers): if not self.step_queue.empty(): step = self.step_queue.get_nowait() self.env_workers[step.worker_id].waiting = False # First enqueue reset commands for all workers so that they reset in parallel for ew in self.env_workers: ew.send("reset", config) # Next (synchronously) collect the reset observations from each worker in sequence for ew in self.env_workers: ew.previous_step = EnvironmentStep(None, ew.recv().payload, None) return list(map(lambda ew: ew.previous_step, self.env_workers))
def reset( self, config: Dict[str, float] = None, train_mode: bool = True, custom_reset_parameters: Any = None, ) -> List[EnvironmentStep]: # type: ignore all_brain_info = self.env.reset( config=config, train_mode=train_mode, custom_reset_parameters=custom_reset_parameters, ) self.previous_step = EnvironmentStep(None, all_brain_info, None) return [self.previous_step]
def step(self) -> List[EnvironmentStep]: all_action_info = self._take_step(self.previous_step) self.previous_all_action_info = all_action_info for brain_name, action_info in all_action_info.items(): self.env.set_actions(brain_name, action_info.action) self.env.step() all_brain_info = self._generate_all_brain_info() step_brain_info = all_brain_info step_info = EnvironmentStep( self.previous_step.current_all_brain_info, step_brain_info, self.previous_all_action_info, ) self.previous_step = step_info return [step_info]
def step(self) -> List[EnvironmentStep]: all_action_info = self._take_step(self.previous_step) self.previous_all_action_info = all_action_info actions = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action values[brain_name] = action_info.value all_brain_info = self.env.step(vector_action=actions, value=values) step_brain_info = all_brain_info step_info = EnvironmentStep( self.previous_step.current_all_brain_info, step_brain_info, self.previous_all_action_info, ) self.previous_step = step_info return [step_info]
def step(self) -> List[EnvironmentStep]: all_action_info = self._take_step(self.previous_step) self.previous_all_action_info = all_action_info actions = {} memories = {} texts = {} values = {} for brain_name, action_info in all_action_info.items(): actions[brain_name] = action_info.action memories[brain_name] = action_info.memory texts[brain_name] = action_info.text values[brain_name] = action_info.value all_brain_info = self.env.step(actions, memories, texts, values) step_brain_info = all_brain_info step_info = EnvironmentStep( self.previous_step.current_all_brain_info, step_brain_info, self.previous_all_action_info, ) self.previous_step = step_info return [step_info]
def __init__(self, env: BaseUnityEnvironment): super().__init__() self.env = env self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None) self.previous_all_action_info: Dict[str, ActionInfo] = {}