Python ArrayDict.items примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorforce.core

Класс/Тип: ArrayDict

Метод/Функция: items

Примеров на hotexamples.com: 3

Python ArrayDict.items - 3 примера найдено. Это лучшие примеры Python кода для tensorforce.core.ArrayDict.items, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ArrayDict(9)

fmap(4)

items(3)

singleton(1)

values(1)

Пример #1

Показать файл

    def experience(self, states, actions, terminal, reward, internals=None):
        """
        Feed experience traces.

        See the [act-experience-update script](https://github.com/tensorforce/tensorforce/blob/master/examples/act_experience_update_interface.py)
        for an example application as part of the act-experience-update interface, which is an
        alternative to the act-observe interaction pattern.

        Args:
            states (dict[array[state]]): Dictionary containing arrays of states
                (<span style="color:#C00000"><b>required</b></span>).
            actions (dict[array[action]]): Dictionary containing arrays of actions
                (<span style="color:#C00000"><b>required</b></span>).
            terminal (array[bool]): Array of terminals
                (<span style="color:#C00000"><b>required</b></span>).
            reward (array[float]): Array of rewards
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[state]): Dictionary containing arrays of internal agent states
                (<span style="color:#C00000"><b>required</b></span> if agent has internal states).
        """
        if not all(len(buffer) == 0 for buffer in self.terminal_buffer):
            raise TensorforceError(
                message="Calling agent.experience is not possible mid-episode."
            )

        # Process states input and infer batching structure
        states, batched, num_instances, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.experience')

        if is_iter_of_dicts:
            # Input structure iter[dict[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.repeat(
                    np.expand_dims(x, axis=0), repeats=num_instances, axis=0)))
            elif not isinstance(internals, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not tuple/list')
            else:
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            # Actions
            if isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not tuple/list')
            elif not isinstance(actions[0], dict):
                actions = ArrayDict(singleton=np.asarray(actions))
            else:
                actions = [ArrayDict(action) for action in actions]
                actions = actions[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=actions[1:])

        else:
            # Input structure dict[iter[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.tile(
                    np.expand_dims(x, axis=0), reps=(num_instances, ))))
            elif not isinstance(internals, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not dict')
            else:
                internals = ArrayDict(internals)

            # Actions
            if not isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not dict')
            else:
                actions = ArrayDict(actions)

        # Expand inputs if not batched
        if not batched:
            internals = internals.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            actions = actions.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            terminal = np.asarray([terminal])
            reward = np.asarray([reward])
        else:
            terminal = np.asarray(terminal)
            reward = np.asarray(reward)

        # Check number of inputs
        for name, internal in internals.items():
            if internal.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(internals[{}])'.format(name),
                    value=internal.shape[0],
                    hint='!= len(states)')
        for name, action in actions.items():
            if action.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(actions[{}])'.format(name),
                    value=action.shape[0],
                    hint='!= len(states)')
        if terminal.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(terminal)'.format(name),
                                         value=terminal.shape[0],
                                         hint='!= len(states)')
        if reward.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(reward)'.format(name),
                                         value=reward.shape[0],
                                         hint='!= len(states)')

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                if name is None:
                    name = 'action'
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(
                    name + '_mask',
                    np.ones(shape=(num_instances, ) + spec.shape +
                            (spec.num_values, ),
                            dtype=spec.np_type()))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function,
                                             cls=ArrayDict,
                                             with_names=True)
        if self.states_spec.is_singleton() and not states.is_singleton():
            states[None] = states.pop('state')

        # Convert terminal to int if necessary
        if terminal.dtype is util.np_dtype(dtype='bool'):
            zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='int'))
            ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='int'))
            terminal = np.where(terminal, ones, zeros)

        if terminal[-1] == 0:
            raise TensorforceError(
                message="Agent.experience() requires full episodes as input.")

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0:
                continue

            function = (lambda x: x[last:index])
            states_batch = states.fmap(function=function)
            internals_batch = internals.fmap(function=function)
            auxiliaries_batch = auxiliaries.fmap(function=function)
            actions_batch = actions.fmap(function=function)
            terminal_batch = function(terminal)
            reward_batch = function(reward)
            last = index

            # Inputs to tensors
            states_batch = self.states_spec.to_tensor(
                value=states_batch,
                batched=True,
                name='Agent.experience states')
            internals_batch = self.internals_spec.to_tensor(
                value=internals_batch,
                batched=True,
                recover_empty=True,
                name='Agent.experience internals')
            auxiliaries_batch = self.auxiliaries_spec.to_tensor(
                value=auxiliaries_batch,
                batched=True,
                name='Agent.experience auxiliaries')
            actions_batch = self.actions_spec.to_tensor(
                value=actions_batch,
                batched=True,
                name='Agent.experience actions')
            terminal_batch = self.terminal_spec.to_tensor(
                value=terminal_batch,
                batched=True,
                name='Agent.experience terminal')
            reward_batch = self.reward_spec.to_tensor(
                value=reward_batch,
                batched=True,
                name='Agent.experience reward')

            # Model.experience()
            timesteps, episodes = self.model.experience(
                states=states_batch,
                internals=internals_batch,
                auxiliaries=auxiliaries_batch,
                actions=actions_batch,
                terminal=terminal_batch,
                reward=reward_batch)
            self.timesteps = timesteps.numpy().item()
            self.episodes = episodes.numpy().item()

        if self.model.saver is not None:
            self.model.save()

Пример #2

Показать файл

Файл: recorder.py Проект: wide725/tensorforce

    def act(self,
            states,
            internals=None,
            parallel=0,
            independent=False,
            deterministic=True,
            **kwargs):
        # Independent and internals
        is_internals_none = (internals is None)
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(name='Agent.act',
                                               argument='parallel',
                                               condition='independent is true')
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is true')
        else:
            if not is_internals_none:
                raise TensorforceError.invalid(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is false')

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.act')

        if independent:
            # Independent mode: handle internals argument
            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts or isinstance(internals, (tuple, list)):
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not tuple/list')
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not dict')
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(
                        function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act',
                            argument='len(internals[{}])'.format(name),
                            value=internal.shape[0],
                            hint='!= len(states)')

        else:
            # Non-independent mode: handle parallel input
            if batched:
                # Batched input
                parallel = np.asarray(parallel)

            elif parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(name='Agent.act',
                                             argument='len(parallel)',
                                             value=len(parallel),
                                             hint='!= len(states)')

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message=
                    "Calling agent.act must be preceded by agent.observe for training, or "
                    "agent.act argument 'independent' must be passed as True.")
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(
                        states[name][n])

        # fn_act()
        if self._is_agent:
            actions, internals = self.fn_act(
                states=states,
                internals=internals,
                parallel=parallel,
                independent=independent,
                deterministic=deterministic,
                is_internals_none=is_internals_none,
                num_parallel=num_parallel)
        else:
            if batched:
                assert False
            else:
                states = states.fmap(function=(
                    lambda x: x[0].item() if x.shape == (1, ) else x[0]))
                actions = self.fn_act(states.to_kwargs())
                if self.actions_spec.is_singleton():
                    actions = ArrayDict(singleton=np.asarray([actions]))
                else:
                    actions = ArrayDict(actions)
                    actions = actions.fmap(
                        function=(lambda x: np.asarray([x])))

        # Buffer outputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(
                        actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn dict of lists into list of dicts
            function = (lambda x: x.item() if x.shape == () else x)
            # TODO: recursive
            if self.actions_spec.is_singleton():
                actions = actions.singleton()
                if is_iter_of_dicts:
                    actions = [
                        function(actions[n]) for n in range(num_parallel)
                    ]
            else:
                if is_iter_of_dicts:
                    actions = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in actions.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    actions = OrderedDict(actions.items())

            if independent and not is_internals_none:
                if is_iter_of_dicts:
                    # TODO: recursive
                    internals = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in internals.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    internals = OrderedDict(internals.items())

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1, ) else x[0])
            if self.actions_spec.is_singleton():
                actions = function(actions.singleton())
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions

Пример #3

Показать файл

    def act(
        self, states, internals=None, parallel=0, independent=False,
        # Deprecated
        deterministic=None, evaluation=None
    ):
        """
        Returns action(s) for the given state(s), needs to be followed by `observe()` unless
        independent mode.

        Args:
            states (dict[state] | iter[dict[state]]): Dictionary containing state(s) to be acted on
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[internal] | iter[dict[internal]]): Dictionary containing current
                internal agent state(s), either given by `initial_internals()` at the beginning of
                an episode or as return value of the preceding `act()` call
                (<span style="color:#C00000"><b>required</b></span> if independent mode and agent
                has internal states).
            parallel (int | iter[int]): Parallel execution index
                (<span style="color:#00C000"><b>default</b></span>: 0).
            independent (bool): Whether act is not part of the main agent-environment interaction,
                and this call is thus not followed by observe
                (<span style="color:#00C000"><b>default</b></span>: false).

        Returns:
            dict[action] | iter[dict[action]], dict[internal] | iter[dict[internal]] if `internals`
            argument given: Dictionary containing action(s), dictionary containing next internal
            agent state(s) if independent mode.
        """
        if deterministic is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='deterministic', replacement='independent'
            )
        if evaluation is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='evaluation', replacement='independent'
            )

        # Independent and internals
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='parallel', condition='independent is true'
                )
            is_internals_none = (internals is None)
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act', argument='internals', condition='independent is true'
                )
        else:
            if internals is not None:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='internals', condition='independent is false'
                )

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts, input_type = self._process_states_input(
            states=states, function_name='Agent.act'
        )

        if independent:
            # Independent mode: handle internals argument

            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts:
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not tuple/list'
                    )
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)), zip_values=internals[1:]
                )

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not dict'
                    )
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act', argument='len(internals[{}])'.format(name),
                            value=internal.shape[0], hint='!= len(states)'
                        )

        else:
            # Non-independent mode: handle parallel input

            if parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            elif batched:
                # Batched input
                parallel = np.asarray(parallel)

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(
                    name='Agent.act', argument='len(parallel)', value=len(parallel),
                    hint='!= len(states)'
                )

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(name + '_mask', np.ones(
                    shape=(num_parallel,) + spec.shape + (spec.num_values,), dtype=spec.np_type()
                ))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function, cls=ArrayDict, with_names=True)

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message="Calling agent.act must be preceded by agent.observe."
                )
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(states[name][n])
                for name in self.auxiliaries_spec:
                    self.buffers['auxiliaries'][name][parallel[n]].append(auxiliaries[name][n])

        # Inputs to tensors
        states = self.states_spec.to_tensor(value=states, batched=True)
        if independent and not is_internals_none:
            internals = self.internals_spec.to_tensor(value=internals, batched=True)
        auxiliaries = self.auxiliaries_spec.to_tensor(value=auxiliaries, batched=True)
        parallel_tensor = self.parallel_spec.to_tensor(value=parallel, batched=True)

        # Model.act()
        if not independent:
            actions, timesteps = self.model.act(
                states=states, auxiliaries=auxiliaries, parallel=parallel_tensor
            )
            self.timesteps = timesteps.numpy().item()

        elif len(self.internals_spec) > 0:
            if len(self.auxiliaries_spec) > 0:
                actions_internals = self.model.independent_act(
                    states=states, internals=internals, auxiliaries=auxiliaries
                )
            else:
                assert len(auxiliaries) == 0
                actions_internals = self.model.independent_act(states=states, internals=internals)
            actions_internals = TensorDict(actions_internals)
            actions = actions_internals['actions']
            internals = actions_internals['internals']

        else:
            if len(self.auxiliaries_spec) > 0:
                actions = self.model.independent_act(states=states, auxiliaries=auxiliaries)
            else:
                assert len(auxiliaries) == 0
                actions = self.model.independent_act(states=states)
            actions = TensorDict(actions)

        # Outputs from tensors
        # print(actions)
        actions = self.actions_spec.from_tensor(tensor=actions, batched=True)

        # Buffer outputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn list of dicts into dict of lists
            function = (lambda x: x.item() if x.shape == () else x)
            if self.single_action:
                actions = input_type(function(actions['action'][n]) for n in range(num_parallel))
            else:
                # TODO: recursive
                actions = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in actions.items()))
                    for n in range(num_parallel)
                )

            if independent and not is_internals_none and is_iter_of_dicts:
                # TODO: recursive
                internals = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in internals.items()))
                    for n in range(num_parallel)
                )

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1,) else x[0])
            if self.single_action:
                actions = function(actions['action'])
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if self.model.saver is not None:
            self.model.save()

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions