Python PreprocessorStack примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlgraph.components

Класс/Тип: PreprocessorStack

Примеров на hotexamples.com: 7

Python PreprocessorStack - 7 примеров найдено. Это лучшие примеры Python кода для rlgraph.components.PreprocessorStack, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_spec(4)

PreprocessorStack(3)

reset(3)

preprocess(1)

Пример #1

Показать файл

Файл: single_threaded_worker.py Проект: mugenZebra/rlgraph

    def setup_preprocessor(preprocessing_spec, in_space):
        if preprocessing_spec is not None:
            # TODO move ingraph for python component assembly.
            preprocessing_spec = deepcopy(preprocessing_spec)
            in_space = deepcopy(in_space)
            # Store scopes (set if not given).
            scopes = []
            for i, preprocessor in enumerate(preprocessing_spec):
                if "scope" not in preprocessor:
                    preprocessor["scope"] = "preprocessor-{}".format(i)
                scopes.append(preprocessor["scope"])
                # Set backend to python.
                preprocessor["backend"] = "python"

            processor_stack = PreprocessorStack(*preprocessing_spec,
                                                backend="python")
            build_space = in_space
            for sub_comp_scope in scopes:
                processor_stack.sub_components[
                    sub_comp_scope].create_variables(
                        input_spaces=dict(preprocessing_inputs=build_space),
                        action_space=None)
                build_space = processor_stack.sub_components[
                    sub_comp_scope].get_preprocessed_space(build_space)
            processor_stack.reset()
            return processor_stack
        else:
            return None

Пример #2

Показать файл

 def setup_preprocessor(self, preprocessing_spec, in_space):
     if preprocessing_spec is not None:
         preprocessing_spec = deepcopy(preprocessing_spec)
         in_space = deepcopy(in_space)
         # Set scopes.
         scopes = [
             preprocessor["scope"] for preprocessor in preprocessing_spec
         ]
         # Set backend to python.
         for spec in preprocessing_spec:
             spec["backend"] = "python"
         processor_stack = PreprocessorStack(*preprocessing_spec,
                                             backend="python")
         build_space = in_space
         for sub_comp_scope in scopes:
             processor_stack.sub_components[
                 sub_comp_scope].create_variables(
                     input_spaces=dict(preprocessing_inputs=build_space),
                     action_space=None)
             build_space = processor_stack.sub_components[
                 sub_comp_scope].get_preprocessed_space(build_space)
         processor_stack.reset()
         return processor_stack
     else:
         return None

Пример #3

Показать файл

    def test_sac_agent_component_on_fake_env(self):
        config = config_from_path("configs/sac_component_for_fake_env_test.json")

        # Arbitrary state space, state should not be used in this example.
        state_space = FloatBox(shape=(2,))
        continuous_action_space = FloatBox(low=-1.0, high=1.0)
        terminal_space = BoolBox(add_batch_rank=True)
        policy = Policy.from_spec(config["policy"], action_space=continuous_action_space)
        policy.add_components(Synchronizable(), expose_apis="sync")
        q_function = ValueFunction.from_spec(config["value_function"])

        agent_component = SACAgentComponent(
            agent=None,
            policy=policy,
            q_function=q_function,
            preprocessor=PreprocessorStack.from_spec([]),
            memory=ReplayMemory.from_spec(config["memory"]),
            discount=config["discount"],
            initial_alpha=config["initial_alpha"],
            target_entropy=None,
            optimizer=AdamOptimizer.from_spec(config["optimizer"]),
            vf_optimizer=AdamOptimizer.from_spec(config["value_function_optimizer"], scope="vf-optimizer"),
            alpha_optimizer=None,
            q_sync_spec=SyncSpecification(sync_interval=10, sync_tau=1.0),
            num_q_functions=2
        )

        test = ComponentTest(
            component=agent_component,
            input_spaces=dict(
                states=state_space.with_batch_rank(),
                preprocessed_states=state_space.with_batch_rank(),
                actions=continuous_action_space.with_batch_rank(),
                rewards=FloatBox(add_batch_rank=True),
                next_states=state_space.with_batch_rank(),
                terminals=terminal_space,
                batch_size=int,
                preprocessed_s_prime=state_space.with_batch_rank(),
                importance_weights=FloatBox(add_batch_rank=True),
                preprocessed_next_states=state_space.with_batch_rank(),
                deterministic=bool,
                weights="variables:{}".format(policy.scope),
                # TODO: how to provide the space for multiple component variables?
                # q_weights=Dict(
                #    q_0="variables:{}".format(q_function.scope),
                #    q_1="variables:{}".format(agent_component._q_functions[1].scope),
                # )
            ),
            action_space=continuous_action_space,
            build_kwargs=dict(
                optimizer=agent_component._optimizer,
                build_options=dict(
                    vf_optimizer=agent_component.vf_optimizer,
                ),
            )
        )

        policy_loss = []
        vf_loss = []

        # This test simulates an env that always requires actions to be close to the max-pdf
        # value of a loc=0.5, scale=0.2 normal, regardless of any state inputs.
        # The component should learn to produce actions like that (close to 0.5).
        true_mean = 0.5
        target_dist = stats.norm(loc=true_mean, scale=0.2)
        batch_size = 100
        for _ in range(5000):
            action_sample = continuous_action_space.sample(batch_size)
            rewards = target_dist.pdf(action_sample)
            result = test.test(("update_from_external_batch", [
                state_space.sample(batch_size),
                action_sample,
                rewards,
                [True] * batch_size,
                state_space.sample(batch_size),
                [1.0] * batch_size  # importance
            ]))
            policy_loss.append(result["actor_loss"])
            vf_loss.append(result["critic_loss"])

        self.assertTrue(np.mean(policy_loss[:100]) > np.mean(policy_loss[-100:]))
        self.assertTrue(np.mean(vf_loss[:100]) > np.mean(vf_loss[-100:]))

        action_sample = np.linspace(-1, 1, batch_size)
        q_values = test.test(("get_q_values", [state_space.sample(batch_size), action_sample]))
        for q_val in q_values:
            q_val = q_val.flatten()
            np.testing.assert_allclose(q_val, target_dist.pdf(action_sample), atol=0.2)

        action_sample, _ = test.test(("action_from_preprocessed_state", [state_space.sample(batch_size), False]))
        action_sample = action_sample.flatten()
        np.testing.assert_allclose(np.mean(action_sample), true_mean, atol=0.1)

Пример #4

Показать файл

Файл: test_apex_executor.py Проект: ml-lab/rlgraph

    def test_with_final_eval(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        DQN.
        """
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")

        # Use n-step adjustments.
        agent_config["execution_spec"]["ray_spec"]["worker_spec"][
            "n_step_adjustment"] = 3
        agent_config["execution_spec"]["ray_spec"]["apex_replay_spec"][
            "n_step_adjustment"] = 3
        agent_config["n_step"] = 3

        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)

        # Get agent.
        agent = executor.local_agent
        preprocessing_spec = agent_config["preprocessing_spec"]

        # Create env.
        env = OpenAIGymEnv.from_spec(env_spec)

        if preprocessing_spec is not None:
            preprocessing_spec = deepcopy(preprocessing_spec)
            in_space = env.state_space.with_batch_rank()
            in_space = deepcopy(in_space)
            # Set scopes.
            scopes = [
                preprocessor["scope"] for preprocessor in preprocessing_spec
            ]
            # Set backend to python.
            for spec in preprocessing_spec:
                spec["backend"] = "python"
            processor_stack = PreprocessorStack(*preprocessing_spec,
                                                backend="python")
            build_space = in_space
            for sub_comp_scope in scopes:
                processor_stack.sub_components[
                    sub_comp_scope].create_variables(
                        input_spaces=dict(preprocessing_inputs=build_space),
                        action_space=None)
                build_space = processor_stack.sub_components[
                    sub_comp_scope].get_preprocessed_space(build_space)
            processor_stack.reset()
        else:
            processor_stack = None

        ep_rewards = []
        print("finished learning, starting eval")
        for _ in range(10):
            state = env.reset()
            terminal = False
            ep_reward = 0
            while not terminal:
                state = agent.state_space.force_batch(state)
                if processor_stack is not None:
                    state = processor_stack.preprocess(state)

                actions = agent.get_action(states=state,
                                           use_exploration=False,
                                           apply_preprocessing=False)
                next_state, step_reward, terminal, info = env.step(
                    actions=actions[0])
                ep_reward += step_reward

                state = next_state
                if terminal:
                    ep_rewards.append(ep_reward)
                    break

        print("Eval episode rewards:")
        print(ep_rewards)

Пример #5

Показать файл

    def __init__(self,
                 state_space,
                 action_space,
                 discount=0.98,
                 preprocessing_spec=None,
                 network_spec=None,
                 internal_states_space=None,
                 policy_spec=None,
                 value_function_spec=None,
                 exploration_spec=None,
                 execution_spec=None,
                 optimizer_spec=None,
                 value_function_optimizer_spec=None,
                 observe_spec=None,
                 update_spec=None,
                 summary_spec=None,
                 saver_spec=None,
                 auto_build=True,
                 name="agent"):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.

            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.

            discount (float): The discount factor (gamma).

            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.

            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.

            policy_spec (Optional[dict]): An optional dict for further kwargs passing into the Policy c'tor.
            value_function_spec (list): Neural network specification for baseline.

            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.

            value_function_optimizer_spec (dict): Optimizer config for value function otpimizer. If None, the optimizer
                spec for the policy is used (same learning rate and optimizer type).

            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.

            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.

            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.flat_state_space = self.state_space.flatten() if isinstance(
            self.state_space, ContainerSpace) else None
        self.logger.info("Parsed state space definition: {}".format(
            self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(
            False)
        self.flat_action_space = self.action_space.flatten() if isinstance(
            self.action_space, ContainerSpace) else None
        self.logger.info("Parsed action space definition: {}".format(
            self.action_space))

        self.discount = discount
        self.build_options = {}

        # The agent's root-Component.
        self.root_component = Component(name=self.name, nesting_level=0)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(states=self.state_space.with_batch_rank(), )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(
            self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(
            preprocessing_spec) > 0
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info(
                "Parsed preprocessed-state space definition: {}".format(
                    self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        policy_spec = policy_spec or dict()
        if "network_spec" not in policy_spec:
            policy_spec["network_spec"] = network_spec
        if "action_space" not in policy_spec:
            policy_spec["action_space"] = self.action_space
        self.policy_spec = policy_spec
        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy.from_spec(self.policy_spec)
        # Done by default.
        self.policy.add_components(Synchronizable(), expose_apis="sync")

        # Create non-shared baseline network.
        self.value_function = None
        if value_function_spec is not None:
            self.value_function = ValueFunction(
                network_spec=value_function_spec)
            self.value_function.add_components(Synchronizable(),
                                               expose_apis="sync")
            self.vars_merger = ContainerMerger("policy",
                                               "vf",
                                               scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", "vf", scope="variable-container-splitter")
        else:
            self.vars_merger = ContainerMerger("policy",
                                               scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", scope="variable-container-splitter")

        self.internal_states_space = Space.from_spec(internal_states_space)

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"

        def factory_(i):
            if i < 2:
                return []
            return tuple([[] for _ in range(i)])

        self.states_buffer = defaultdict(
            list)  # partial(fact_, len(self.flat_state_space)))
        self.actions_buffer = defaultdict(
            partial(factory_, len(self.flat_action_space or [])))
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(
            list)  # partial(fact_, len(self.flat_state_space)))
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            # Save spec in case agent needs to create more optimizers e.g. for baseline.
            self.optimizer_spec = optimizer_spec
            self.optimizer = Optimizer.from_spec(optimizer_spec)

        self.value_function_optimizer = None
        if self.value_function is not None:
            if value_function_optimizer_spec is None:
                vf_optimizer_spec = self.optimizer_spec
            else:
                vf_optimizer_spec = value_function_optimizer_spec
            vf_optimizer_spec["scope"] = "value-function-optimizer"
            self.value_function_optimizer = Optimizer.from_spec(
                vf_optimizer_spec)

        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space,
                                          summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec)  # type: GraphExecutor

Пример #6

Показать файл

    def test_sac_agent_component_functionality(self):
        config = config_from_path(
            "configs/sac_component_for_fake_env_test.json")

        # Arbitrary state space, state should not be used in this example.
        state_space = FloatBox(shape=(8, ))
        continuous_action_space = FloatBox(shape=(1, ), low=-2.0, high=2.0)
        terminal_space = BoolBox(add_batch_rank=True)
        rewards_space = FloatBox(add_batch_rank=True)
        policy = Policy.from_spec(config["policy"],
                                  action_space=continuous_action_space)
        policy.add_components(Synchronizable(), expose_apis="sync")
        q_function = ValueFunction.from_spec(config["value_function"])

        agent_component = SACAgentComponent(
            agent=None,
            policy=policy,
            q_function=q_function,
            preprocessor=PreprocessorStack.from_spec([]),
            memory=ReplayMemory.from_spec(config["memory"]),
            discount=config["discount"],
            initial_alpha=config["initial_alpha"],
            target_entropy=None,
            optimizer=AdamOptimizer.from_spec(config["optimizer"]),
            vf_optimizer=AdamOptimizer.from_spec(
                config["value_function_optimizer"], scope="vf-optimizer"),
            alpha_optimizer=None,
            q_sync_spec=SyncSpecification(sync_interval=10, sync_tau=1.0),
            num_q_functions=2)

        test = ComponentTest(
            component=agent_component,
            input_spaces=dict(
                states=state_space.with_batch_rank(),
                preprocessed_states=state_space.with_batch_rank(),
                env_actions=continuous_action_space.with_batch_rank(),
                actions=continuous_action_space.with_batch_rank(),
                rewards=rewards_space,
                next_states=state_space.with_batch_rank(),
                terminals=terminal_space,
                batch_size=int,
                preprocessed_s_prime=state_space.with_batch_rank(),
                importance_weights=FloatBox(add_batch_rank=True),
                preprocessed_next_states=state_space.with_batch_rank(),
                deterministic=bool,
                weights="variables:{}".format(policy.scope),
                # TODO: how to provide the space for multiple component variables?
                #q_weights=Dict(
                #    q_0="variables:{}".format(q_function.scope),
                #    q_1="variables:{}".format(agent_component._q_functions[1].scope),
                #)
            ),
            action_space=continuous_action_space,
            build_kwargs=dict(
                optimizer=agent_component._optimizer,
                build_options=dict(
                    vf_optimizer=agent_component.vf_optimizer, ),
            ))

        batch_size = 10
        action_sample = continuous_action_space.with_batch_rank().sample(
            batch_size)
        rewards = rewards_space.sample(batch_size)
        # Check, whether an update runs ok.
        result = test.test((
            "update_from_external_batch",
            [
                state_space.sample(batch_size),
                action_sample,
                rewards,
                [True] * batch_size,
                state_space.sample(batch_size),
                [1.0] * batch_size  # importance
            ]))
        self.assertTrue(result["actor_loss"].dtype == np.float32)
        self.assertTrue(result["critic_loss"].dtype == np.float32)

        action_sample = np.linspace(-1, 1, batch_size).reshape((batch_size, 1))
        q_values = test.test(
            ("get_q_values", [state_space.sample(batch_size), action_sample]))
        for q_val in q_values:
            self.assertTrue(q_val.dtype == np.float32)
            self.assertTrue(q_val.shape == (batch_size, 1))

        action_sample, _ = test.test(("action_from_preprocessed_state",
                                      [state_space.sample(batch_size), False]))
        self.assertTrue(action_sample.dtype == np.float32)
        self.assertTrue(action_sample.shape == (batch_size, 1))

Пример #7

Показать файл

Файл: agent.py Проект: mugenZebra/rlgraph

    def __init__(
        self,
        state_space,
        action_space,
        discount=0.98,
        preprocessing_spec=None,
        network_spec=None,
        internal_states_space=None,
        action_adapter_spec=None,
        exploration_spec=None,
        execution_spec=None,
        optimizer_spec=None,
        observe_spec=None,
        update_spec=None,
        summary_spec=None,
        saver_spec=None,
        auto_build=True,
        name="agent"
    ):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.
            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.
            discount (float): The discount factor (gamma).
            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.
            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.
            action_adapter_spec (Optional[dict,ActionAdapter]): The spec-dict for the ActionAdapter Component or the
                ActionAdapter object itself.
            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.
            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.
            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.
            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.logger.info("Parsed state space definition: {}".format(self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(False)
        self.logger.info("Parsed action space definition: {}".format(self.action_space))

        self.discount = discount

        # The agent's root-Component.
        self.root_component = Component(name=self.name)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_policy_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(
            states=self.state_space.with_batch_rank(),
        )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(preprocessing_spec) > 1
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info("Parsed preprocessed-state space definition: {}".format(self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        self.neural_network = None
        if network_spec is not None:
            self.neural_network = NeuralNetwork.from_spec(network_spec)
        self.action_adapter_spec = action_adapter_spec

        self.internal_states_space = internal_states_space

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        # The action adapter mapping raw NN output to (shaped) actions.
        action_adapter_dict = dict(action_space=self.action_space)
        if self.action_adapter_spec is None:
            self.action_adapter_spec = action_adapter_dict
        else:
            self.action_adapter_spec.update(action_adapter_dict)

        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy(
            network_spec=self.neural_network,
            action_adapter_spec=self.action_adapter_spec
        )

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"
        self.states_buffer = defaultdict(list)
        self.actions_buffer = defaultdict(list)
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(list)
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)
        if self.observe_spec["buffer_enabled"]:
            self.reset_env_buffers()

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            self.optimizer = Optimizer.from_spec(optimizer_spec)  #get_optimizer_from_device_strategy(
                #optimizer_spec, self.execution_spec.get("device_strategy", 'default')
        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space, summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec
        )  # type: GraphExecutor