Пример #1
0
    def _worker(self, root_dir, parameters, device_queue):
        # sleep for random seconds to avoid crowded launching
        try:
            time.sleep(random.uniform(0, 3))

            device = device_queue.get()
            if self._conf.use_gpu:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(device)
            else:
                os.environ["CUDA_VISIBLE_DEVICES"] = ""  # run on cpu

            if torch.cuda.is_available():
                alf.set_default_device("cuda")
            logging.set_verbosity(logging.INFO)

            logging.info("Search parameters %s" % parameters)
            with gin.unlock_config():
                gin.parse_config(
                    ['%s=%s' % (k, v) for k, v in parameters.items()])
                gin.parse_config(
                    "TrainerConfig.confirm_checkpoint_upon_crash=False")
            train_eval(FLAGS.ml_type, root_dir)

            device_queue.put(device)
        except Exception as e:
            logging.info(traceback.format_exc())
            raise e
Пример #2
0
 def delayed_dequeue():
     # cpu tensor on subprocess.  Otherwise, spawn method is needed.
     alf.set_default_device("cpu")
     sleep(0.04)
     ring_buffer.dequeue()  # 6(deleted), 7, 8, 9
     sleep(0.04)  # 10, 7, 8, 9
     ring_buffer.dequeue()  # 10, 7(deleted), 8, 9
Пример #3
0
    def _worker(self, conn, env_constructor, env_id=None, flatten=False):
        """The process waits for actions and sends back environment results.

        Args:
            conn (multiprocessing.connection): Connection for communication to the main process.
            env_constructor (Callable): callable environment creator.
            flatten (bool): whether to assume flattened actions and time_steps
              during communication to avoid overhead.

        Raises:
            KeyError: When receiving a message of unknown type.
        """
        try:
            alf.set_default_device("cpu")
            env = env_constructor(env_id)
            action_spec = env.action_spec()
            conn.send(self._READY)  # Ready.
            while True:
                try:
                    # Only block for short times to have keyboard exceptions be raised.
                    if not conn.poll(0.1):
                        continue
                    message, payload = conn.recv()
                except (EOFError, KeyboardInterrupt):
                    break
                if message == self._ACCESS:
                    name = payload
                    result = getattr(env, name)
                    conn.send((self._RESULT, result))
                    continue
                if message == self._CALL:
                    name, args, kwargs = payload
                    if flatten and name == 'step':
                        args = [nest.pack_sequence_as(action_spec, args[0])]
                    result = getattr(env, name)(*args, **kwargs)
                    if flatten and name in ['step', 'reset']:
                        result = nest.flatten(result)
                        assert all([
                            not isinstance(x, torch.Tensor) for x in result
                        ]), ("Tensor result is not allowed: %s" % name)
                    conn.send((self._RESULT, result))
                    continue
                if message == self._CLOSE:
                    assert payload is None
                    env.close()
                    break
                raise KeyError(
                    'Received message of unknown type {}'.format(message))
        except Exception:  # pylint: disable=broad-except
            etype, evalue, tb = sys.exc_info()
            stacktrace = ''.join(traceback.format_exception(etype, evalue, tb))
            message = 'Error in environment process: {}'.format(stacktrace)
            logging.error(message)
            conn.send((self._EXCEPTION, stacktrace))
        finally:
            conn.close()
Пример #4
0
 def __init__(self, *args):
     super().__init__(*args)
     alf.set_default_device("cpu")  # spawn forking is required to use cuda.
     self.data_spec = DataItem(env_id=alf.TensorSpec(shape=(),
                                                     dtype=torch.int64),
                               x=alf.TensorSpec(shape=(self.dim, ),
                                                dtype=torch.float32),
                               t=alf.TensorSpec(shape=(),
                                                dtype=torch.int32),
                               o=dict({
                                   "a":
                                   alf.TensorSpec(shape=(),
                                                  dtype=torch.float32),
                                   "g":
                                   alf.TensorSpec(shape=(),
                                                  dtype=torch.float32)
                               }),
                               reward=alf.TensorSpec(shape=(),
                                                     dtype=torch.float32))
Пример #5
0
 def delayed_enqueue(ring_buffer, batch):
     alf.set_default_device("cpu")
     sleep(0.04)
     ring_buffer.enqueue(batch, batch.env_id)