Python OLT示例，mava.types.OLT Python示例

示例#1

0

显示文件

def get_expected_parallel_timesteps_1() -> TimeStep:
    return TimeStep(
        step_type=StepType.FIRST,
        reward={
            "agent_0": 0.0,
            "agent_1": 0.0,
            "agent_2": 0.0
        },
        discount={
            "agent_0": 1.0,
            "agent_1": 1.0,
            "agent_2": 1.0
        },
        observation={
            "agent_0":
            OLT(
                observation=[0.1, 0.3, 0.7],
                legal_actions=[1],
                terminal=[0.0],
            ),
            "agent_1":
            OLT(
                observation=[0.1, 0.3, 0.7],
                legal_actions=[1],
                terminal=[0.0],
            ),
            "agent_2":
            OLT(
                observation=[0.1, 0.3, 0.7],
                legal_actions=[1],
                terminal=[0.0],
            ),
        },
    )

示例#2

0

显示文件

文件： debugging_envs.py 项目： NetColby/DNRL

    def _convert_observations(
        self, observes: Dict[str, np.ndarray], dones: Dict[str, bool]
    ) -> Dict[str, OLT]:
        observations: Dict[str, OLT] = {}
        for agent, observation in observes.items():
            if isinstance(observation, dict) and "action_mask" in observation:
                legals = observation["action_mask"]
                observation = observation["observation"]
            else:
                # TODO Handle legal actions better for continuous envs,
                #  maybe have min and max for each action and clip the agents actions
                #  accordingly
                legals = np.ones(
                    _convert_to_spec(self._environment.action_spaces[agent]).shape,
                    dtype=self._environment.action_spaces[agent].dtype,
                )

            observation = np.array(observation, dtype=np.float32)
            observations[agent] = OLT(
                observation=observation,
                legal_actions=legals,
                terminal=np.asarray([dones[agent]], dtype=np.float32),
            )

        return observations

示例#3

0

显示文件

def get_seq_timesteps_1() -> TimeStep:
    return TimeStep(
        step_type=StepType.FIRST,
        reward=0.0,
        discount=1.0,
        observation=OLT(observation=[0.1, 0.3, 0.7],
                        legal_actions=[1],
                        terminal=[0.0]),
    )

示例#4

0

显示文件

def get_seq_timesteps_dict_2() -> Dict[str, SeqTimestepDict]:
    return {
        "agent_0": {
            "timestep":
            TimeStep(
                step_type=StepType.FIRST,
                reward=-1,
                discount=0.8,
                observation=OLT(observation=[0.1, 0.5, 0.7],
                                legal_actions=[1],
                                terminal=[0.0]),
            ),
            "action":
            0,
        },
        "agent_1": {
            "timestep":
            TimeStep(
                step_type=StepType.FIRST,
                reward=0.0,
                discount=0.8,
                observation=OLT(observation=[0.8, 0.3, 0.7],
                                legal_actions=[1],
                                terminal=[0.0]),
            ),
            "action":
            2,
        },
        "agent_2": {
            "timestep":
            TimeStep(
                step_type=StepType.FIRST,
                reward=1,
                discount=1.0,
                observation=OLT(observation=[0.9, 0.9, 0.8],
                                legal_actions=[1],
                                terminal=[0.0]),
            ),
            "action":
            1,
        },
    }

示例#5

0

显示文件

文件： debugging_envs.py 项目： NetColby/DNRL

 def observation_spec(self) -> Dict[str, OLT]:
     observation_specs = {}
     for agent in self._environment.agent_ids:
         observation_specs[agent] = OLT(
             observation=_convert_to_spec(
                 self._environment.observation_spaces[agent]
             ),
             legal_actions=_convert_to_spec(self._environment.action_spaces[agent]),
             terminal=specs.Array((1,), np.float32),
         )
     return observation_specs

示例#6

0

显示文件

文件： flatland.py 项目： NetColby/DNRL

 def observation_spec(self) -> Dict[str, OLT]:
     observation_specs = {}
     for agent in self.agents:
         observation_specs[agent] = OLT(
             observation=tuple((
                 _convert_to_spec(self.observation_spaces[agent]),
                 agent_info_spec(),
             )) if self._include_agent_info else _convert_to_spec(
                 self.observation_spaces[agent]),
             legal_actions=_convert_to_spec(self.action_spaces[agent]),
             terminal=specs.Array((1, ), np.float32),
         )
     return observation_specs

示例#7

0

显示文件

文件： mocks.py 项目： NetColby/DNRL

    def observation_spec(self) -> OLT:

        if hasattr(self, "agent_selection"):
            active_agent = self.agent_selection
        else:
            active_agent = self.agents[0]
        return OLT(
            observation=super().observation_spec(),
            legal_actions=self.action_spec()[active_agent],
            terminal=specs.Array(
                (1, ),
                np.float32,
            ),
        )

示例#8

0

显示文件

文件： tf_utils.py 项目： NetColby/DNRL

def create_variables(
    network: snt.Module,
    input_spec: List[OLT],
) -> Optional[tf.TensorSpec]:
    """Builds the network with dummy inputs to create the necessary variables.
    Args:
      network: Sonnet Module whose variables are to be created.
      input_spec: list of input specs to the network. The length of this list
        should match the number of arguments expected by `network`.
    Returns:
      output_spec: only returns an output spec if the output is a tf.Tensor, else
          it doesn't return anything (None); e.g. if the output is a
          tfp.distributions.Distribution.
    """
    # Create a dummy observation with no batch dimension.
    dummy_input = [
        OLT(
            observation=zeros_like(in_spec.observation),
            legal_actions=ones_like(in_spec.legal_actions),
            terminal=zeros_like(in_spec.terminal),
        ) for in_spec in input_spec
    ]

    # If we have an RNNCore the hidden state will be an additional input.
    if isinstance(network, snt.RNNCore):
        initial_state = squeeze_batch_dim(network.initial_state(1))
        dummy_input += [initial_state]

    # Forward pass of the network which will create variables as a side effect.
    dummy_output = network(*add_batch_dim(dummy_input))

    # Evaluate the input signature by converting the dummy input into a
    # TensorSpec. We then save the signature as a property of the network. This is
    # done so that we can later use it when creating snapshots. We do this here
    # because the snapshot code may not have access to the precise form of the
    # inputs.
    input_signature = tree.map_structure(
        lambda t: tf.TensorSpec((None, ) + t.shape, t.dtype), dummy_input)
    network._input_signature = input_signature  # pylint: disable=protected-access

    def spec(output: tf.Tensor) -> tf.TensorSpec:
        # If the output is not a Tensor, return None as spec is ill-defined.
        if not isinstance(output, tf.Tensor):
            return None
        # If this is not a scalar Tensor, make sure to squeeze out the batch dim.
        if tf.rank(output) > 0:
            output = squeeze_batch_dim(output)
        return tf.TensorSpec(output.shape, output.dtype)

    return tree.map_structure(spec, dummy_output)

示例#9

0

显示文件

文件： mocks.py 项目： NetColby/DNRL

    def observation_spec(self) -> Observation:
        observation_specs = {}
        for agent in self.agents:
            legals = self.action_spec()[agent]
            terminal = specs.Array(
                (1, ),
                np.float32,
            )

            observation_specs[agent] = OLT(
                observation=super().observation_spec(),
                legal_actions=legals,
                terminal=terminal,
            )
        return observation_specs