def step(self) -> None: # Run the learning step. fetches = self._step() if self._require_loggers: self._create_loggers(list(fetches.keys())) self._require_loggers = False # compute statistics self._compute_statistics(fetches) # Compute elapsed time. timestamp = time.time() elapsed_time = timestamp - self._timestamp if self._timestamp else 0 self._timestamp: float = timestamp # Update our counts and record it. counts = self._counter.increment(steps=1, walltime=elapsed_time) fetches.update(counts) if self._system_checkpointer: train_utils.checkpoint_networks(self._system_checkpointer) if self._logger: self._logger.write(fetches)
def step(self) -> None: """trainer step to update the parameters of the agents in the system""" # Run the learning step. fetches = self._step() # Compute elapsed time. timestamp = time.time() if self._timestamp: elapsed_time = timestamp - self._timestamp else: elapsed_time = 0 self._timestamp = timestamp # type: ignore # Update our counts and record it. counts = self._counter.increment(steps=1, walltime=elapsed_time) fetches.update(counts) # Checkpoint and attempt to write the logs. if self._checkpoint: train_utils.checkpoint_networks(self._system_checkpointer) # Log and decrement epsilon epsilon = self.get_epsilon() fetches["epsilon"] = epsilon self._decrement_epsilon() if self._logger: self._logger.write(fetches)
def step(self) -> None: fetches = self._step() # Compute elapsed time. timestamp = time.time() elapsed_time = timestamp - self._timestamp if self._timestamp else 0 self._timestamp: float = timestamp # Update our counts and record it. counts = self._counter.increment(steps=1, walltime=elapsed_time) fetches.update(counts) if self._system_checkpointer: train_utils.checkpoint_networks(self._system_checkpointer) if self._logger: self._logger.write(fetches)