Python Space примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlgraph.spaces.space

Класс/Тип: Space

Примеров на hotexamples.com: 5

Python Space - 5 примеров найдено. Это лучшие примеры Python кода для rlgraph.spaces.space.Space, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_spec(5)

Основные методы

from_spec (5)

Пример #1

Показать файл

    def __init__(self,
                 specifiable_class,
                 spec,
                 output_spaces,
                 shutdown_method=None):
        """
        Args:
            specifiable_class (type): The class to use for constructing the Specifiable from spec. This class needs to be
                a child class of Specifiable (with a __lookup_classes__ property).
            spec (dict): The specification dict that will be used to construct the Specifiable.
            output_spaces (Union[callable,Dict[str,Space]]): A callable that takes a method_name as argument
                and returns the Space(s) that this method (on the Specifiable object) would return. Alternatively:
                A dict with key=method name and value=Space(s).
            shutdown_method (Optional[str]): An optional name of a shutdown method that will be called on the
                Specifiable object before "server" shutdown to give the Specifiable a chance to clean up.
                The Specifiable must implement this method.
            #flatten_output_dicts (bool): Whether output dictionaries should be flattened to tuples and then
            #    returned.
        """
        super(SpecifiableServer, self).__init__()

        self.specifiable_class = specifiable_class
        self.spec = spec
        # If dict: Process possible specs so we don't have to do this during calls.
        if isinstance(output_spaces, dict):
            self.output_spaces = {}
            for method_name, space_spec in output_spaces.items():
                if isinstance(space_spec, (tuple, list)):
                    self.output_spaces[method_name] = [
                        Space.from_spec(spec) if spec is not None else None
                        for spec in space_spec
                    ]
                else:
                    self.output_spaces[method_name] = Space.from_spec(
                        space_spec) if space_spec is not None else None
        else:
            self.output_spaces = output_spaces
        self.shutdown_method = shutdown_method

        # The process in which the Specifiable will run.
        self.process = None
        # The out-pipe to send commands (method calls) to the server process.
        self.out_pipe = None
        # The in-pipe to receive "ready" signal from the server process.
        self.in_pipe = None

        # Register this object with the class.
        self.INSTANCES.append(self)

Пример #2

Показать файл

Файл: containers.py Проект: mugenZebra/rlgraph

    def __new__(cls, *components, **kwargs):
        if isinstance(components[0], (list, tuple)):
            assert len(components) == 1
            components = components[0]

        add_batch_rank = kwargs.get("add_batch_rank", False)
        add_time_rank = kwargs.get("add_time_rank", False)
        time_major = kwargs.get("time_major", False)

        # Allow for any spec or already constructed Space to be passed in as values in the python-list/tuple.
        list_ = list()
        for value in components:
            # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch-rank.
            if isinstance(value, Space):
                list_.append(value.with_extra_ranks(add_batch_rank, add_time_rank, time_major))
            # Value is a list/tuple -> treat as Tuple space.
            elif isinstance(value, (list, tuple)):
                list_.append(
                    Tuple(*value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major)
                )
            # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`.
            elif (isinstance(value, dict) and "type" in value) or not isinstance(value, dict):
                list_.append(Space.from_spec(
                    value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major
                ))
            # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one.
            else:
                list_.append(Dict(
                    value, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major
                ))

        return tuple.__new__(cls, list_)

Пример #3

Показать файл

Файл: containers.py Проект: theSoenke/rlgraph

    def __init__(self, spec=None, **kwargs):
        add_batch_rank = kwargs.pop("add_batch_rank", False)
        add_time_rank = kwargs.pop("add_time_rank", False)
        time_major = kwargs.pop("time_major", False)

        ContainerSpace.__init__(self,
                                add_batch_rank=add_batch_rank,
                                add_time_rank=add_time_rank,
                                time_major=time_major)

        # Allow for any spec or already constructed Space to be passed in as values in the python-dict.
        # Spec may be part of kwargs.
        if spec is None:
            spec = kwargs

        space_dict = {}
        for key in sorted(spec.keys()):
            # Keys must be strings.
            if not isinstance(key, str):
                raise RLGraphError(
                    "ERROR: No non-str keys allowed in a Dict-Space!")
            # Prohibit reserved characters (for flattened syntax).
            if re.search(
                    r'/|{}\d+{}'.format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE),
                    key):
                raise RLGraphError(
                    "ERROR: Key to Dict must not contain '/' or '{}\d+{}'! Is {}."
                    .format(FLAT_TUPLE_OPEN, FLAT_TUPLE_CLOSE, key))
            value = spec[key]
            # Value is already a Space: Copy it (to not affect original Space) and maybe add/remove batch/time-ranks.
            if isinstance(value, Space):
                w_batch_w_time = value.with_extra_ranks(
                    add_batch_rank, add_time_rank, time_major)
                space_dict[key] = w_batch_w_time
            # Value is a list/tuple -> treat as Tuple space.
            elif isinstance(value, (list, tuple)):
                space_dict[key] = Tuple(*value,
                                        add_batch_rank=add_batch_rank,
                                        add_time_rank=add_time_rank,
                                        time_major=time_major)
            # Value is a spec (or a spec-dict with "type" field) -> produce via `from_spec`.
            elif (isinstance(value, dict)
                  and "type" in value) or not isinstance(value, dict):
                space_dict[key] = Space.from_spec(
                    value,
                    add_batch_rank=add_batch_rank,
                    add_time_rank=add_time_rank,
                    time_major=time_major)
            # Value is a simple dict -> recursively construct another Dict Space as a sub-space of this one.
            else:
                space_dict[key] = Dict(value,
                                       add_batch_rank=add_batch_rank,
                                       add_time_rank=add_time_rank,
                                       time_major=time_major)

        dict.__init__(self, space_dict)

Пример #4

Показать файл

Файл: policy.py Проект: samialabed/rlgraph

    def __init__(self,
                 network_spec,
                 action_space=None,
                 action_adapter_spec=None,
                 deterministic=True,
                 scope="policy",
                 **kwargs):
        """
        Args:
            network_spec (Union[NeuralNetwork,dict]): The NeuralNetwork Component or a specification dict to build
                one.

            action_space (Space): The action Space within which this Component will create actions.

            action_adapter_spec (Optional[dict]): A spec-dict to create an ActionAdapter. Use None for the default
                ActionAdapter object.

            deterministic (bool): Whether to pick actions according to the max-likelihood value or via sampling.
                Default: True.

            batch_apply (bool): Whether to wrap both the NN and the ActionAdapter with a BatchApply Component in order
                to fold time rank into batch rank before a forward pass.
        """
        super(Policy, self).__init__(scope=scope, **kwargs)

        self.neural_network = NeuralNetwork.from_spec(
            network_spec)  # type: NeuralNetwork

        # Create the necessary action adapters for this Policy. One for each action space component.
        self.action_adapters = dict()
        if action_space is None:
            self.action_adapters[""] = ActionAdapter.from_spec(
                action_adapter_spec)
            self.action_space = self.action_adapters[""].action_space
            # Assert single component action space.
            assert len(self.action_space.flatten()) == 1,\
                "ERROR: Action space must not be ContainerSpace if no `action_space` is given in Policy c'tor!"
        else:
            self.action_space = Space.from_spec(action_space)
            for i, (flat_key, action_component) in enumerate(
                    self.action_space.flatten().items()):
                if action_adapter_spec is not None:
                    aa_spec = action_adapter_spec.get(flat_key,
                                                      action_adapter_spec)
                    aa_spec["action_space"] = action_component
                else:
                    aa_spec = dict(action_space=action_component)
                self.action_adapters[flat_key] = ActionAdapter.from_spec(
                    aa_spec, scope="action-adapter-{}".format(i))

        self.deterministic = deterministic

        # Figure out our Distributions.
        self.distributions = dict()
        for i, (flat_key, action_component) in enumerate(
                self.action_space.flatten().items()):
            if isinstance(action_component, IntBox):
                self.distributions[flat_key] = Categorical(
                    scope="categorical-{}".format(i))
            # Continuous action space -> Normal distribution (each action needs mean and variance from network).
            elif isinstance(action_component, FloatBox):
                self.distributions[flat_key] = Normal(
                    scope="normal-{}".format(i))
            else:
                raise RLGraphError(
                    "ERROR: `action_component` is of type {} and not allowed in {} Component!"
                    .format(type(action_space).__name__, self.name))

        self.add_components(*[self.neural_network] +
                            list(self.action_adapters.values()) +
                            list(self.distributions.values()))

Пример #5

Показать файл

Файл: agent.py Проект: mugenZebra/rlgraph

    def __init__(
        self,
        state_space,
        action_space,
        discount=0.98,
        preprocessing_spec=None,
        network_spec=None,
        internal_states_space=None,
        action_adapter_spec=None,
        exploration_spec=None,
        execution_spec=None,
        optimizer_spec=None,
        observe_spec=None,
        update_spec=None,
        summary_spec=None,
        saver_spec=None,
        auto_build=True,
        name="agent"
    ):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.
            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.
            discount (float): The discount factor (gamma).
            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.
            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.
            action_adapter_spec (Optional[dict,ActionAdapter]): The spec-dict for the ActionAdapter Component or the
                ActionAdapter object itself.
            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.
            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.
            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.
            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.logger.info("Parsed state space definition: {}".format(self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(False)
        self.logger.info("Parsed action space definition: {}".format(self.action_space))

        self.discount = discount

        # The agent's root-Component.
        self.root_component = Component(name=self.name)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_policy_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(
            states=self.state_space.with_batch_rank(),
        )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(preprocessing_spec) > 1
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info("Parsed preprocessed-state space definition: {}".format(self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        self.neural_network = None
        if network_spec is not None:
            self.neural_network = NeuralNetwork.from_spec(network_spec)
        self.action_adapter_spec = action_adapter_spec

        self.internal_states_space = internal_states_space

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        # The action adapter mapping raw NN output to (shaped) actions.
        action_adapter_dict = dict(action_space=self.action_space)
        if self.action_adapter_spec is None:
            self.action_adapter_spec = action_adapter_dict
        else:
            self.action_adapter_spec.update(action_adapter_dict)

        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy(
            network_spec=self.neural_network,
            action_adapter_spec=self.action_adapter_spec
        )

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"
        self.states_buffer = defaultdict(list)
        self.actions_buffer = defaultdict(list)
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(list)
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)
        if self.observe_spec["buffer_enabled"]:
            self.reset_env_buffers()

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            self.optimizer = Optimizer.from_spec(optimizer_spec)  #get_optimizer_from_device_strategy(
                #optimizer_spec, self.execution_spec.get("device_strategy", 'default')
        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space, summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec
        )  # type: GraphExecutor