Exemplo n.º 1
0
def stack_specs(*specs):
    for spec in specs[1:]:
        assert spec.shape == specs[0].shape
        assert spec.dtype == specs[0].dtype
        assert spec.name == specs[0].name

    if isinstance(specs[0], BoundedArraySpec):
        min_min = specs[0].minimum
        max_max = specs[0].maximum
        for spec in specs:
            min_min = min(min_min, spec.minimum)
            max_max = max(max_max, spec.maximum)

        spec = BoundedArraySpec(
            (len(specs), ) + specs[0].shape,
            specs[0].dtype,
            min_min,
            max_max,
            name='batched_' +
            ('spec' if specs[0].name is None else specs[0].name))
    else:
        specs[0].expand_dims(len(specs), axis=0)
        spec = specs[0]

    return spec
Exemplo n.º 2
0
 def action_spec(self):
     if self.config.muldi_actions:
         if self.config.k_schedule.enable:
             max_k = max(self.config.k_schedule.values)
         else:
             max_k = self.k
         return BoundedArraySpec((max_k, ),
                                 np.int32,
                                 minimum=0,
                                 maximum=self._max_nodes - 1,
                                 name='action_spec')
     else:
         return BoundedArraySpec((),
                                 np.int32,
                                 minimum=0,
                                 maximum=self._max_nodes - 1,
                                 name='action_spec')
Exemplo n.º 3
0
 def _make_step_spec(self, obs_spec):
     self._step_spec = dict(
         step_type=BoundedArraySpec(dtype=np.int8,
                                    shape=(),
                                    minimum=0,
                                    maximum=2,
                                    name='batched_env_step'
                                    '_type_spec'),
         reward=ArraySpec(dtype=np.float32,
                          shape=(),
                          name='batched_env_reward_spec'),
         discount=ArraySpec(dtype=np.float32,
                            shape=(),
                            name='batched_env_discount_spec'),
         observation=obs_spec)
     return self._step_spec
Exemplo n.º 4
0
 def action_spec(self):
     return BoundedArraySpec((),
                             np.int32,
                             minimum=0,
                             maximum=len(self._graph_features.nodes) - 1,
                             name='action_spec')
Exemplo n.º 5
0
 def action_spec(self):
   return BoundedArraySpec((),
                           np.int32,
                           minimum=0,
                           maximum=len(self._variable_nodes) - 1,
                           name='action_spec')
Exemplo n.º 6
0
 def action_spec(self):
   return BoundedArraySpec((),
                           np.int32,
                           minimum=0,
                           maximum=len(self._nx_graph) - 1,
                           name='action_spec')
Exemplo n.º 7
0
 def action_spec(self):
     return BoundedArraySpec((),
                             np.int32,
                             minimum=0,
                             maximum=1,
                             name='action_spec')