def __init__(self, process: Process, worker_id: int, conn: Connection):
     self.process = process
     self.worker_id = worker_id
     self.conn = conn
     self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}
     self.waiting = False
示例#2
0
 def __init__(self, env: BaseEnv,
              float_prop_channel: FloatPropertiesChannel):
     super().__init__()
     self.shared_float_properties = float_prop_channel
     self.env = env
     self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}
    def _postprocess_steps(
        self, env_steps: List[EnvironmentResponse]
    ) -> List[EnvironmentStep]:
        step_infos = []
        timer_nodes = []
        for step in env_steps:
            payload: StepResponse = step.payload
            env_worker = self.env_workers[step.worker_id]
            new_step = EnvironmentStep(
                env_worker.previous_step.current_all_brain_info,
                payload.all_brain_info,
                env_worker.previous_all_action_info,
            )
            step_infos.append(new_step)
            env_worker.previous_step = new_step

            if payload.timer_root:
                timer_nodes.append(payload.timer_root)

        if timer_nodes:
            with hierarchical_timer("workers") as main_timer_node:
                for worker_timer_node in timer_nodes:
                    main_timer_node.merge(
                        worker_timer_node, root_name="worker_root", is_parallel=True
                    )

        return step_infos
示例#4
0
 def reset(
     self, config: Dict[str, float] = None
 ) -> List[EnvironmentStep]:  # type: ignore
     if config is not None:
         for k, v in config.items():
             self.shared_float_properties.set_property(k, v)
     all_brain_info = self.env.reset()
     self.previous_step = EnvironmentStep(None, all_brain_info, None)
     return [self.previous_step]
 def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
     while any(ew.waiting for ew in self.env_workers):
         if not self.step_queue.empty():
             step = self.step_queue.get_nowait()
             self.env_workers[step.worker_id].waiting = False
     # First enqueue reset commands for all workers so that they reset in parallel
     for ew in self.env_workers:
         ew.send("reset", config)
     # Next (synchronously) collect the reset observations from each worker in sequence
     for ew in self.env_workers:
         ew.previous_step = EnvironmentStep(None, ew.recv().payload, None)
     return list(map(lambda ew: ew.previous_step, self.env_workers))
示例#6
0
 def reset(
     self,
     config: Dict[str, float] = None,
     train_mode: bool = True,
     custom_reset_parameters: Any = None,
 ) -> List[EnvironmentStep]:  # type: ignore
     all_brain_info = self.env.reset(
         config=config,
         train_mode=train_mode,
         custom_reset_parameters=custom_reset_parameters,
     )
     self.previous_step = EnvironmentStep(None, all_brain_info, None)
     return [self.previous_step]
示例#7
0
    def step(self) -> List[EnvironmentStep]:
        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        for brain_name, action_info in all_action_info.items():
            self.env.set_actions(brain_name, action_info.action)
        self.env.step()
        all_brain_info = self._generate_all_brain_info()
        step_brain_info = all_brain_info

        step_info = EnvironmentStep(
            self.previous_step.current_all_brain_info,
            step_brain_info,
            self.previous_all_action_info,
        )
        self.previous_step = step_info
        return [step_info]
示例#8
0
    def step(self) -> List[EnvironmentStep]:

        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        actions = {}
        values = {}
        for brain_name, action_info in all_action_info.items():
            actions[brain_name] = action_info.action
            values[brain_name] = action_info.value
        all_brain_info = self.env.step(vector_action=actions, value=values)
        step_brain_info = all_brain_info

        step_info = EnvironmentStep(
            self.previous_step.current_all_brain_info,
            step_brain_info,
            self.previous_all_action_info,
        )
        self.previous_step = step_info
        return [step_info]
示例#9
0
    def step(self) -> List[EnvironmentStep]:

        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        actions = {}
        memories = {}
        texts = {}
        values = {}
        for brain_name, action_info in all_action_info.items():
            actions[brain_name] = action_info.action
            memories[brain_name] = action_info.memory
            texts[brain_name] = action_info.text
            values[brain_name] = action_info.value
        all_brain_info = self.env.step(actions, memories, texts, values)
        step_brain_info = all_brain_info

        step_info = EnvironmentStep(
            self.previous_step.current_all_brain_info,
            step_brain_info,
            self.previous_all_action_info,
        )
        self.previous_step = step_info
        return [step_info]
示例#10
0
 def __init__(self, env: BaseUnityEnvironment):
     super().__init__()
     self.env = env
     self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
     self.previous_all_action_info: Dict[str, ActionInfo] = {}