Пример #1
0
    def act(
        self, states, parallel=0, deterministic=False, independent=False, evaluation=False,
        query=None, **kwargs
    ):
        """
        Returns action(s) for the given state(s), needs to be followed by `observe(...)` unless
        `independent` is true.

        Args:
            states (dict[state]): Dictionary containing state(s) to be acted on
                (<span style="color:#C00000"><b>required</b></span>).
            parallel (int): Parallel execution index
                (<span style="color:#00C000"><b>default</b></span>: 0).
            deterministic (bool): Whether to apply exploration and sampling
                (<span style="color:#00C000"><b>default</b></span>: false).
            independent (bool): Whether action is not remembered, and this call is thus not
                followed by observe
                (<span style="color:#00C000"><b>default</b></span>: false).
            evaluation (bool): Whether the agent is currently evaluated, implies and overwrites
                deterministic and independent
                (<span style="color:#00C000"><b>default</b></span>: false).
            query (list[str]): Names of tensors to retrieve
                (<span style="color:#00C000"><b>default</b></span>: none).
            kwargs: Additional input values, for instance, for dynamic hyperparameters.

        Returns:
            (dict[action], plus optional list[str]): Dictionary containing action(s), plus queried
            tensor values if requested.
        """
        assert util.reduce_all(predicate=util.not_nan_inf, xs=states)

        # self.current_internals = self.next_internals
        if evaluation:
            if deterministic or independent:
                raise TensorforceError.unexpected()
            deterministic = independent = True

        # Auxiliaries
        auxiliaries = OrderedDict()
        if isinstance(states, dict):
            states = dict(states)
            for name, spec in self.actions_spec.items():
                if spec['type'] == 'int' and name + '_mask' in states:
                    auxiliaries[name + '_mask'] = states.pop(name + '_mask')

        # Normalize states dictionary
        states = util.normalize_values(
            value_type='state', values=states, values_spec=self.states_spec
        )

        # Batch states
        states = util.fmap(function=(lambda x: np.asarray([x])), xs=states, depth=1)
        auxiliaries = util.fmap(function=(lambda x: np.asarray([x])), xs=auxiliaries, depth=1)

        # Model.act()
        if query is None:
            actions, self.timesteps = self.model.act(
                states=states, auxiliaries=auxiliaries, parallel=parallel,
                deterministic=deterministic, independent=independent, **kwargs
            )

        else:
            actions, self.timesteps, queried = self.model.act(
                states=states, auxiliaries=auxiliaries, parallel=parallel,
                deterministic=deterministic, independent=independent, query=query, **kwargs
            )

        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            index = self.buffer_indices[parallel]
            for name in self.states_spec:
                self.states_buffers[name][parallel, index] = states[name][0]
            for name, spec in self.actions_spec.items():
                self.actions_buffers[name][parallel, index] = actions[name][0]
                if spec['type'] == 'int':
                    name = name + '_mask'
                    if name in auxiliaries:
                        self.states_buffers[name][parallel, index] = auxiliaries[name][0]
                    else:
                        shape = (1,) + spec['shape'] + (spec['num_values'],)
                        self.states_buffers[name][parallel, index] = np.full(
                            shape=shape, fill_value=True, dtype=util.np_dtype(dtype='bool')
                        )

        # Unbatch actions
        actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=1)

        # Reverse normalized actions dictionary
        actions = util.unpack_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )

        # if independent, return processed state as well?

        if query is None:
            return actions
        else:
            return actions, queried
Пример #2
0
    def act(self,
            states,
            internals=None,
            parallel=0,
            independent=False,
            deterministic=False,
            evaluation=False,
            query=None,
            **kwargs):
        """
        Returns action(s) for the given state(s), needs to be followed by `observe(...)` unless independent mode set via `independent`/`evaluation`.

        Args:
            states (dict[state] | iter[dict[state]]): Dictionary containing state(s) to be acted on
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[internal] | iter[dict[internal]]): Dictionary containing current
                internal agent state(s)
                (<span style="color:#C00000"><b>required</b></span> if independent mode).
            parallel (int | iter[int]): Parallel execution index
                (<span style="color:#00C000"><b>default</b></span>: 0).
            independent (bool): Whether act is not part of the main agent-environment interaction,
                and this call is thus not followed by observe
                (<span style="color:#00C000"><b>default</b></span>: false).
            deterministic (bool): Ff independent mode, whether to act deterministically, so no
                exploration and sampling
                (<span style="color:#00C000"><b>default</b></span>: false).
            evaluation (bool): Whether the agent is currently evaluated, implies independent and
                deterministic
                (<span style="color:#00C000"><b>default</b></span>: false).
            query (list[str]): Names of tensors to retrieve
                (<span style="color:#00C000"><b>default</b></span>: none).
            kwargs: Additional input values, for instance, for dynamic hyperparameters.

        Returns:
            dict[action] | iter[dict[action]], if independent mode dict[internal] |
            iter[dict[internal]], plus optional list[str]: Dictionary containing action(s),
            dictionary containing next internal agent state(s) if independent mode, plus queried
            tensor values if requested.
        """
        assert util.reduce_all(predicate=util.not_nan_inf, xs=states)

        if evaluation:
            if deterministic:
                raise TensorforceError.invalid(name='agent.act',
                                               argument='deterministic',
                                               condition='evaluation = true')
            if independent:
                raise TensorforceError.invalid(name='agent.act',
                                               argument='independent',
                                               condition='evaluation = true')
            deterministic = independent = True

        if not independent:
            if internals is not None:
                raise TensorforceError.invalid(name='agent.act',
                                               argument='internals',
                                               condition='independent = false')
            if deterministic:
                raise TensorforceError.invalid(name='agent.act',
                                               argument='deterministic',
                                               condition='independent = false')

        if independent:
            internals_is_none = (internals is None)
            if internals_is_none:
                internals = OrderedDict()

        # Batch states
        batched = (not isinstance(parallel, int))
        if batched:
            if len(parallel) == 0:
                raise TensorforceError.value(name='agent.act',
                                             argument='parallel',
                                             value=parallel,
                                             hint='zero-length')
            parallel = np.asarray(list(parallel))
            if isinstance(states[0], dict):
                states = OrderedDict(
                    ((name,
                      np.asarray(
                          [states[n][name] for n in range(len(parallel))]))
                     for name in states[0]))
            else:
                states = np.asarray(states)
            if independent:
                internals = OrderedDict(
                    ((name,
                      np.asarray(
                          [internals[n][name] for n in range(len(parallel))]))
                     for name in internals[0]))
        else:
            parallel = np.asarray([parallel])
            states = util.fmap(function=(lambda x: np.asarray([x])),
                               xs=states,
                               depth=int(isinstance(states, dict)))
            if independent:
                internals = util.fmap(function=(lambda x: np.asarray([x])),
                                      xs=internals,
                                      depth=1)

        if not independent and not all(self.timestep_completed[n]
                                       for n in parallel):
            raise TensorforceError(
                message="Calling agent.act must be preceded by agent.observe.")

        # Auxiliaries
        auxiliaries = OrderedDict()
        if isinstance(states, dict):
            states = dict(states)
            for name, spec in self.actions_spec.items():
                if spec['type'] == 'int' and name + '_mask' in states:
                    auxiliaries[name + '_mask'] = states.pop(name + '_mask')

        # Normalize states dictionary
        states = util.normalize_values(value_type='state',
                                       values=states,
                                       values_spec=self.states_spec)

        # Model.act()
        if independent:
            if query is None:
                actions, internals = self.model.independent_act(
                    states=states,
                    internals=internals,
                    auxiliaries=auxiliaries,
                    parallel=parallel,
                    deterministic=deterministic,
                    **kwargs)

            else:
                actions, internals, queried = self.model.independent_act(
                    states=states,
                    internals=internals,
                    auxiliaries=auxiliaries,
                    parallel=parallel,
                    deterministic=deterministic,
                    query=query,
                    **kwargs)

        else:
            if query is None:
                actions, self.timesteps = self.model.act(
                    states=states,
                    auxiliaries=auxiliaries,
                    parallel=parallel,
                    **kwargs)

            else:
                actions, self.timesteps, queried = self.model.act(
                    states=states,
                    auxiliaries=auxiliaries,
                    parallel=parallel,
                    query=query,
                    **kwargs)

        if not independent:
            for n in parallel:
                self.timestep_completed[n] = False

        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(len(parallel)):
                index = self.buffer_indices[parallel[n]]
                for name in self.states_spec:
                    self.states_buffers[name][parallel[n],
                                              index] = states[name][n]
                for name, spec in self.actions_spec.items():
                    self.actions_buffers[name][parallel[n],
                                               index] = actions[name][n]
                    if spec['type'] == 'int':
                        name = name + '_mask'
                        if name in auxiliaries:
                            self.states_buffers[name][
                                parallel[n], index] = auxiliaries[name][n]
                        else:
                            shape = (1, ) + spec['shape'] + (
                                spec['num_values'], )
                            self.states_buffers[name][parallel[n],
                                                      index] = np.full(
                                                          shape=shape,
                                                          fill_value=True,
                                                          dtype=util.np_dtype(
                                                              dtype='bool'))

        # Reverse normalized actions dictionary
        actions = util.unpack_values(value_type='action',
                                     values=actions,
                                     values_spec=self.actions_spec)

        # Unbatch actions
        if batched:
            if isinstance(actions, dict):
                actions = [
                    OrderedDict(((name, actions[name][n]) for name in actions))
                    for n in range(len(parallel))
                ]
        else:
            actions = util.fmap(function=(lambda x: x[0]),
                                xs=actions,
                                depth=int(isinstance(actions, dict)))
            if independent:
                internals = util.fmap(function=(lambda x: x[0]),
                                      xs=internals,
                                      depth=1)

        if independent and not internals_is_none:
            if query is None:
                return actions, internals
            else:
                return actions, internals, queried

        else:
            if query is None:
                return actions
            else:
                return actions, queried
Пример #3
0
    def experience(
        self, states, actions, terminal, reward, internals=None, query=None, **kwargs
    ):
        """
        Feed experience traces.

        Args:
            states (dict[array[state]]): Dictionary containing arrays of states
                (<span style="color:#C00000"><b>required</b></span>).
            actions (dict[array[action]]): Dictionary containing arrays of actions
                (<span style="color:#C00000"><b>required</b></span>).
            terminal (array[bool]): Array of terminals
                (<span style="color:#C00000"><b>required</b></span>).
            reward (array[float]): Array of rewards
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[state]): Dictionary containing arrays of internal agent states
                (<span style="color:#00C000"><b>default</b></span>: no internal states).
            query (list[str]): Names of tensors to retrieve
                (<span style="color:#00C000"><b>default</b></span>: none).
            kwargs: Additional input values, for instance, for dynamic hyperparameters.
        """
        assert (self.buffer_indices == 0).all()
        assert util.reduce_all(predicate=util.not_nan_inf, xs=states)
        assert internals is None or util.reduce_all(predicate=util.not_nan_inf, xs=internals)
        assert util.reduce_all(predicate=util.not_nan_inf, xs=actions)
        assert util.reduce_all(predicate=util.not_nan_inf, xs=reward)

        # Auxiliaries
        auxiliaries = OrderedDict()
        if isinstance(states, dict):
            for name, spec in self.actions_spec.items():
                if spec['type'] == 'int' and name + '_mask' in states:
                    auxiliaries[name + '_mask'] = np.asarray(states.pop(name + '_mask'))
        auxiliaries = util.fmap(function=np.asarray, xs=auxiliaries, depth=1)

        # Normalize states/actions dictionaries
        states = util.normalize_values(
            value_type='state', values=states, values_spec=self.states_spec
        )
        actions = util.normalize_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )
        if internals is None:
            internals = OrderedDict()

        if isinstance(terminal, (bool, int)):
            states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
            internals = util.fmap(function=(lambda x: [x]), xs=internals, depth=1)
            auxiliaries = util.fmap(function=(lambda x: [x]), xs=auxiliaries, depth=1)
            actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
            terminal = [terminal]
            reward = [reward]

        states = util.fmap(function=np.asarray, xs=states, depth=1)
        internals = util.fmap(function=np.asarray, xs=internals, depth=1)
        auxiliaries = util.fmap(function=np.asarray, xs=auxiliaries, depth=1)
        actions = util.fmap(function=np.asarray, xs=actions, depth=1)

        if isinstance(terminal, np.ndarray):
            if terminal.dtype is util.np_dtype(dtype='bool'):
                zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='long'))
                ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='long'))
                terminal = np.where(terminal, ones, zeros)
        else:
            terminal = np.asarray([int(x) if isinstance(x, bool) else x for x in terminal])
        reward = np.asarray(reward)

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0 and index - last < self.experience_size:
                continue

            # Include terminal in batch if possible
            if index < len(terminal) and terminal[index - 1] == 0 and terminal[index] > 0 and \
                    index - last < self.experience_size:
                index += 1

            function = (lambda x: x[last: index])
            states_batch = util.fmap(function=function, xs=states, depth=1)
            internals_batch = util.fmap(function=function, xs=internals, depth=1)
            auxiliaries_batch = util.fmap(function=function, xs=auxiliaries, depth=1)
            actions_batch = util.fmap(function=function, xs=actions, depth=1)
            terminal_batch = terminal[last: index]
            reward_batch = reward[last: index]
            last = index

            # Model.experience()
            if query is None:
                self.timesteps, self.episodes, self.updates = self.model.experience(
                    states=states_batch, internals=internals_batch,
                    auxiliaries=auxiliaries_batch, actions=actions_batch, terminal=terminal_batch,
                    reward=reward_batch, **kwargs
                )

            else:
                self.timesteps, self.episodes, self.updates, queried = self.model.experience(
                    states=states_batch, internals=internals_batch,
                    auxiliaries=auxiliaries_batch, actions=actions_batch, terminal=terminal_batch,
                    reward=reward_batch, query=query, **kwargs
                )

        if query is not None:
            return queried
    def act(self,
            states,
            parallel=0,
            deterministic=False,
            independent=False,
            query=None,
            **kwargs):
        """
        Return action(s) for given state(s). States preprocessing and exploration are applied if
        configured accordingly.

        Args:
            states (any): One state (usually a value tuple) or dict of states if multiple states are expected.
            deterministic (bool): If true, no exploration and sampling is applied.
            independent (bool): If true, action is not followed by observe (and hence not included
                in updates).
            fetch_tensors (list): Optional String of named tensors to fetch
        Returns:
            Scalar value of the action or dict of multiple actions the agent wants to execute.
            (fetched_tensors) Optional dict() with named tensors fetched
        """
        # self.current_internals = self.next_internals

        # Normalize states dictionary
        states = util.normalize_values(value_type='state',
                                       values=states,
                                       values_spec=self.states_spec)

        # Batch states
        states = util.fmap(function=(lambda x: [x]), xs=states)

        # Model.act()
        if query is None:
            actions, self.timestep = self.model.act(
                states=states,
                parallel=parallel,
                deterministic=deterministic,
                independent=independent,
                **kwargs)

        else:
            actions, self.timestep, query = self.model.act(
                states=states,
                parallel=parallel,
                deterministic=deterministic,
                independent=independent,
                query=query,
                **kwargs)

        # Unbatch actions
        actions = util.fmap(function=(lambda x: x[0]), xs=actions)

        # Reverse normalized actions dictionary
        actions = util.unpack_values(value_type='action',
                                     values=actions,
                                     values_spec=self.actions_spec)

        # if independent, return processed state as well?

        if query is None:
            return actions
        else:
            return actions, query