def action_spec(self) -> t.Tuple[specs.DiscreteArray, specs.DiscreteArray]: N = 9 # 1-hop neighbourhood return ( specs.DiscreteArray(num_values=N, dtype=np.int8, name='P-action'), specs.DiscreteArray(num_values=N, dtype=np.int8, name='D-action'), )
def __init__(self, scaling_factor=1., action_layers='agent', control_velocity=False, momentum=0.): """Constructor. Args: scaling_factor: Scalar. Scaling factor multiplied to the action. agent_layer: String or iterable of strings. Elements (or itself if string) must be keys in the environment state. All sprites in these layers will be acted upon by this action space. control_velocity: Bool. Whether to control velocity (True) or force (False). momentum: Float in [0, 1]. Discount factor for previous action. This should be zero if control_velocity is False, because imparting forces automatically gives momentum to the agent(s) being controlled. If control_velocity is True, setting this greater than zero gives the controlled agent(s) momentum. However, the velocity is clipped at scaling_factor, so the agent only retains momentum when stopping or changing direction and does not accelerate. """ self._scaling_factor = scaling_factor if not isinstance(action_layers, (list, tuple)): action_layers = (action_layers, ) self._action_layers = action_layers self._control_velocity = control_velocity self._momentum = momentum self._action_spec = specs.DiscreteArray(len(self._ACTIONS))
def tensor_spec_to_dm_env_spec( tensor_spec: dm_env_rpc_pb2.TensorSpec) -> specs.Array: """Returns a dm_env spec given a dm_env_rpc TensorSpec. Args: tensor_spec: A dm_env_rpc TensorSpec protobuf. Returns: Either a DiscreteArray, BoundedArray, StringArray or Array, depending on the content of the TensorSpec. """ np_type = tensor_utils.data_type_to_np_type(tensor_spec.dtype) if tensor_spec.HasField('min') or tensor_spec.HasField('max'): bounds = tensor_spec_utils.bounds(tensor_spec) if (not tensor_spec.shape and np.issubdtype(np_type, np.integer) and bounds.min == 0 and tensor_spec.HasField('max')): return specs.DiscreteArray(num_values=bounds.max + 1, dtype=np_type, name=tensor_spec.name) else: return specs.BoundedArray(shape=tensor_spec.shape, dtype=np_type, name=tensor_spec.name, minimum=bounds.min, maximum=bounds.max) else: if tensor_spec.dtype == dm_env_rpc_pb2.DataType.STRING: return specs.StringArray(shape=tensor_spec.shape, name=tensor_spec.name) else: return specs.Array(shape=tensor_spec.shape, dtype=np_type, name=tensor_spec.name)
def testProperties(self): num_values = 5 spec = specs.DiscreteArray(num_values=5) self.assertEqual(spec.minimum, 0) self.assertEqual(spec.maximum, num_values - 1) self.assertEqual(spec.dtype, np.int32) self.assertEqual(spec.num_values, num_values)
def testReplace(self, arg_name, new_value): old_spec = specs.DiscreteArray(2, np.int32, "test") new_spec = old_spec.replace(**{arg_name: new_value}) self.assertIsNot(old_spec, new_spec) self.assertEqual(getattr(new_spec, arg_name), new_value) for attr_name in set(["num_values", "dtype", "name"]).difference([arg_name]): self.assertEqual(getattr(new_spec, attr_name), getattr(old_spec, attr_name))
def __init__(self, step_size=0.05, motion_cost=0.): """Constructor. Args: step_size: Fraction of the arena width the sprite moves for each step. motion_cost: Each step incurs cost motion_cost * step_size. """ self._step_size = step_size self._motion_cost = motion_cost self._action_spec = [ specs.DiscreteArray(num_values=2, dtype=np.int64), specs.DiscreteArray(num_values=4, dtype=np.int64), ] self.action_to_motion = { 0: np.array([0, -self._step_size]), 1: np.array([-self._step_size, 0]), 2: np.array([0, self._step_size]), 3: np.array([self._step_size, 0]), }
def __init__(self, episode_length, canvas_width, grid_width, brush_sizes, rewards=None, discount=1., shaders_basedir=""): self._name = "fluid_paint" if brush_sizes is None: self._brush_sizes = [10.0, 30.0, 50.0] else: self._brush_sizes = brush_sizes self._canvas_width = canvas_width self._grid_width = grid_width self._grid_size = grid_width * grid_width self._rewards = rewards # Build action specification and action masks. self._action_spec = collections.OrderedDict([ ("control", specs.DiscreteArray(self._grid_size)), ("end", specs.DiscreteArray(self._grid_size)), ("flag", specs.DiscreteArray(2)), ("speed", specs.DiscreteArray(len(self.STROKES_PER_STEP))), ("size", specs.DiscreteArray(len(self._brush_sizes))), ("red", specs.DiscreteArray(len(self.R_VALUES))), ("green", specs.DiscreteArray(len(self.G_VALUES))), ("blue", specs.DiscreteArray(len(self.B_VALUES))), ("alpha", specs.DiscreteArray(len(self.A_VALUES))) ]) self._action_masks = copy.deepcopy(self.ACTION_MASKS) self._brush_params = None self._prev_reward = 0 config = config_pb2.Config() self._wrapper = pyfluid.Wrapper(config.SerializeToString()) self._wrapper.Setup(self._canvas_width, self._canvas_width, shaders_basedir) self._episode_step = 0 self._episode_length = episode_length self._prev_step_type = None self._discount = discount
def space2spec(space: gym.Space, name: str = None): """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs. Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and Dict spaces are recursively converted to tuples and dictionaries of specs. Args: space: The Gym space to convert. name: Optional name to apply to all return spec(s). Returns: A dm_env spec or nested structure of specs, corresponding to the input space. """ if isinstance(space, spaces.Discrete): return specs.DiscreteArray(num_values=space.n, dtype=space.dtype, name=name) elif isinstance(space, spaces.Box): return specs.BoundedArray(shape=space.shape, dtype=space.dtype, minimum=space.low, maximum=space.high, name=name) elif isinstance(space, spaces.MultiBinary): return specs.BoundedArray(shape=space.shape, dtype=space.dtype, minimum=0.0, maximum=1.0, name=name) elif isinstance(space, spaces.MultiDiscrete): return specs.BoundedArray(shape=space.shape, dtype=space.dtype, minimum=np.zeros(space.shape), maximum=space.nvec, name=name) elif isinstance(space, spaces.Tuple): return tuple(space2spec(s, name) for s in space.spaces) elif isinstance(space, spaces.Dict): return { key: space2spec(value, name) for key, value in space.spaces.items() } else: raise ValueError('Unexpected gym space: {}'.format(space))
def observation_spec( self ) -> t.Tuple[specs.Array, specs.DiscreteArray, specs.DiscreteArray, t.Optional[specs.DiscreteArray]]: N = len(self._sim.zone_dict) N_lives = self._sim.max_n_lives return ( specs.Array(shape=(N, 2), dtype=np.int16, name='Zones'), specs.DiscreteArray(num_values=N, dtype=np.int16, name='P-zone'), specs.DiscreteArray(num_values=N, dtype=np.int16, name='D-zone'), specs.DiscreteArray(num_values=N_lives + 1, dtype=np.int16, name='Lives') \ if N_lives else None )
def action_spec(self, *args, **kwargs): action_spec = {} # The action spec of the unity_environment is documented in # unity/environment.py. unity_action_spec = self._unity_environment.action_spec() action_spec["Horizontal"] = unity_action_spec["SetPosX"] action_spec["Vertical"] = unity_action_spec["SetPosY"] action_spec["Sticky"] = specs.DiscreteArray(num_values=2) action_spec["Selector"] = specs.BoundedArray( [], dtype=np.float32, minimum=-self._display_limit, maximum=self._display_limit) return action_spec
def test_scalar_with_0_n_bounds_gives_discrete_array(self): tensor_spec = dm_env_rpc_pb2.TensorSpec() tensor_spec.dtype = dm_env_rpc_pb2.DataType.UINT32 tensor_spec.name = 'foo' max_value = 9 tensor_spec.min.uint32s.array[:] = [0] tensor_spec.max.uint32s.array[:] = [max_value] actual = dm_env_utils.tensor_spec_to_dm_env_spec(tensor_spec) expected = specs.DiscreteArray(num_values=max_value + 1, dtype=np.uint32, name='foo') self.assertEqual(expected, actual) self.assertEqual(0, actual.minimum) self.assertEqual(max_value, actual.maximum) self.assertEqual('foo', actual.name)
def action_spec(self) -> specs.DiscreteArray: return specs.DiscreteArray(9, name="action")
def action_spec(self) -> specs.DiscreteArray: """Returns the action spec.""" return specs.DiscreteArray(dtype=np.int, num_values=len(_ACTIONS), name="action")
def action_spec(self): return specs.DiscreteArray(2, name='action')
def testDtypeNotIntegral(self, dtype): with self.assertRaisesWithLiteralMatch( ValueError, specs._DTYPE_NOT_INTEGRAL.format(dtype)): specs.DiscreteArray(num_values=5, dtype=dtype)
def action_spec(self) -> dm_specs.DiscreteArray: """Returns the action spec.""" return dm_specs.DiscreteArray(self.max_moves, dtype=np.int, name="action")
def action_spec(self): return specs.DiscreteArray(dtype=np.int, num_values=3, name='action')
def testSerialization(self): desc = specs.DiscreteArray(2, np.int32, "test") self.assertEqual(pickle.loads(pickle.dumps(desc)), desc)
def observation_spec(self): """Returns the observation spec.""" return specs.DiscreteArray(dtype=int, num_values=2, name="board")
def action_spec(self): return specs.DiscreteArray(num_values=10, name='action')
def testRepr(self): as_string = repr(specs.DiscreteArray(num_values=5)) self.assertIn("num_values=5", as_string)
def testDtypeOverflow(self, num_values, dtype): with self.assertRaisesWithLiteralMatch( ValueError, specs._DTYPE_OVERFLOW.format(np.dtype(dtype), num_values)): specs.DiscreteArray(num_values=num_values, dtype=dtype)
def observation_spec(self) -> specs.DiscreteArray: return specs.DiscreteArray(sum(self.shape), name="observation")
def action_spec(self): return specs.DiscreteArray(self._n_actions, name='action')
def action_spec(self): return specs.DiscreteArray(self._env.action_space.n)
def action_spec(self): return specs.DiscreteArray(dtype=int, num_values=0, name="action")
def __init__(self, episode_length, canvas_width, grid_width, brush_type, brush_sizes, use_color, use_pressure=True, use_alpha=False, background="white", rewards=None, discount=1., brushes_basedir=""): self._name = "libmypaint" if brush_sizes is None: brush_sizes = [1, 2, 3] self._canvas_width = canvas_width self._grid_width = grid_width self._grid_size = grid_width * grid_width self._use_color = use_color self._use_alpha = use_alpha if not self._use_color: self._output_channels = 1 elif not self._use_alpha: self._output_channels = 3 else: self._output_channels = 4 self._use_pressure = use_pressure assert np.all(np.array(brush_sizes) > 0.) self._log_brush_sizes = [np.log(float(i)) for i in brush_sizes] self._rewards = rewards # Build action specification and action masks. self._action_spec = collections.OrderedDict([ ("control", specs.DiscreteArray(self._grid_size)), ("end", specs.DiscreteArray(self._grid_size)), ("flag", specs.DiscreteArray(2)), ("pressure", specs.DiscreteArray(len(self.P_VALUES))), ("size", specs.DiscreteArray(len(self._log_brush_sizes))), ("red", specs.DiscreteArray(len(self.R_VALUES))), ("green", specs.DiscreteArray(len(self.G_VALUES))), ("blue", specs.DiscreteArray(len(self.B_VALUES))) ]) self._action_masks = copy.deepcopy(self.ACTION_MASKS) def remove_action_mask(name): for k in self._action_masks.keys(): del self._action_masks[k][name] if not self._use_pressure: del self._action_spec["pressure"] remove_action_mask("pressure") if len(self._log_brush_sizes) > 1: self._use_size = True else: del self._action_spec["size"] remove_action_mask("size") self._use_size = False if not self._use_color: for k in self.COLOR_ACTIONS: del self._action_spec[k] remove_action_mask(k) # Setup the painting surface. if background == "white": background = pylibmypaint.SurfaceWrapper.Background.kWhite elif background == "transparent": background = pylibmypaint.SurfaceWrapper.Background.kBlack else: raise ValueError("Invalid background type: {}".format(background)) self._surface = pylibmypaint.SurfaceWrapper(self._canvas_width, self._canvas_width, background) # Setup the brush. self._brush = pylibmypaint.BrushWrapper() self._brush.SetSurface(self._surface) self._brush.LoadFromFile( os.path.join(brushes_basedir, "brushes/{}.myb".format(brush_type))) self._episode_step = 0 self._episode_length = episode_length self._prev_step_type = None self._discount = discount
def action_spec(self) -> specs.DiscreteArray: action_spec = self._environment.action_spec()[0] return specs.DiscreteArray(num_values=action_spec.maximum.item() + 1, dtype=action_spec.dtype, name='action_spec')
def action_spec(self) -> specs.DiscreteArray: raw_spec = self._environment.action_spec()[0] return specs.DiscreteArray(num_values=raw_spec.maximum.item() - raw_spec.minimum.item() + 1)
def testInvalidNumActions(self, num_values): with self.assertRaisesWithLiteralMatch( ValueError, specs._NUM_VALUES_NOT_POSITIVE.format(num_values)): specs.DiscreteArray(num_values=num_values)