Python recursive_assert_almost_equal примеры, rlgraph.tests.test_util.recursive_assert_almost_equal Python примеры использования

Пример #1

0

Показать файл

Файл: test_ring_buffer.py Проект: rosea-tf/surreal

    def test_latest_batch(self):
        """
        Tests if we can fetch latest steps.
        """
        for backend in (None, "python"):
            ring_buffer = RingBuffer(capacity=self.capacity, backend=backend)
            test = ComponentTest(component=ring_buffer, input_spaces=self.input_spaces)

            # Insert 5 random elements.
            observation = non_terminal_records(self.record_space, 5)
            test.test(("insert_records", observation), expected_outputs=None)

            # First, test if the basic computation works.
            batch = test.test(("get_records", 5), expected_outputs=None)
            recursive_assert_almost_equal(batch, observation)

            # Next, insert capacity more elements:
            observation = non_terminal_records(self.record_space, self.capacity)
            test.test(("insert_records", observation), expected_outputs=None)

            # If we now fetch capacity elements, we expect to see exactly the last 10.
            batch = test.test(("get_records", self.capacity), expected_outputs=None)
            recursive_assert_almost_equal(batch, observation)

            # If we fetch n elements, we expect to see exactly the last n.
            for last_n in range(1, 6):
                batch = test.test(("get_records", last_n), expected_outputs=None)
                recursive_assert_almost_equal(batch["actions"]["action1"], observation["actions"]["action1"][-last_n:])
                recursive_assert_almost_equal(batch["states"]["state2"], observation["states"]["state2"][-last_n:])
                recursive_assert_almost_equal(batch["terminals"], observation["terminals"][-last_n:])

Пример #2

0

Показать файл

 def assert_equal(outs, expected_outputs, decimals=7):
     """
     Convenience wrapper: See implementation of `recursive_assert_almost_equal` for details.
     """
     recursive_assert_almost_equal(outs,
                                   expected_outputs,
                                   decimals=decimals)

Пример #3

0

Показать файл

    def test_value_function_weights(self):
        """
        Tests changing of value function weights.
        """
        env = OpenAIGymEnv("Pong-v0")
        agent_config = config_from_path("configs/ppo_agent_for_pong.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)
        weights = agent.get_weights()
        assert "value_function_weights" in weights
        assert "policy_weights" in weights

        policy_weights = weights["policy_weights"]
        value_function_weights = weights["value_function_weights"]

        # Just change vf weights.
        for key, weight in value_function_weights.items():
            value_function_weights[key] = weight + 0.01
        agent.set_weights(policy_weights, value_function_weights)
        new_actual_weights = agent.get_weights()

        recursive_assert_almost_equal(
            new_actual_weights["value_function_weights"],
            value_function_weights)

Пример #4

0

Показать файл

    def test_demos_with_container_actions(self):
        # Tests if dqfd can fit a set of states to a set of actions.
        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(
            20)
        demo_actions = actions_space.with_batch_rank().sample(20)
        demo_rewards = rewards.sample(20, fill_value=1.0)
        demo_next_states = agent.preprocessed_state_space.with_batch_rank(
        ).sample(20)
        demo_terminals = terminals.sample(20, fill_value=False)

        # Insert.
        agent.observe_demos(
            preprocessed_states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            terminals=demo_terminals,
        )

        # Fit demos.
        agent.update_from_demos(num_updates=5000, batch_size=20)

        # Evaluate demos:
        agent_actions = agent.get_action(demo_states,
                                         apply_preprocessing=False,
                                         use_exploration=False)
        recursive_assert_almost_equal(agent_actions, demo_actions)

Пример #5

0

Показать файл

Файл: test_actor_components.py Проект: EmpereurCC/RLgraph_exp

    def test_actor_component_with_lstm_network(self):
        # state space and internal state space
        state_space = FloatBox(shape=(2,), add_batch_rank=True, add_time_rank=True, time_major=False)
        internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,)), add_batch_rank=True)
        time_percentages_space = FloatBox()
        # action_space.
        action_space = IntBox(2, add_batch_rank=True, add_time_rank=True)

        preprocessor = PreprocessorStack.from_spec(
            [dict(type="convert_type", to_dtype="float"), dict(type="divide", divisor=10)]
        )
        policy = Policy(network_spec=config_from_path("configs/test_lstm_nn.json"), action_space=action_space)
        exploration = Exploration(epsilon_spec=dict(decay_spec=dict(
            type="linear_decay", from_=1.0, to_=0.1)
        ))
        actor_component = ActorComponent(preprocessor, policy, exploration)
        test = ComponentTest(
            component=actor_component,
            input_spaces=dict(
                states=state_space,
                other_nn_inputs=Tuple(internal_states_space, add_batch_rank=True),
                time_percentage=time_percentages_space
            ),
            action_space=action_space
        )
        # Some state inputs (batch size=2, seq-len=1000; batch-major).
        np.random.seed(10)
        states = state_space.sample(size=(1000, 2))
        initial_internal_states = internal_states_space.zeros(size=2)  # only batch
        time_percentages = time_percentages_space.sample(1000)

        # Run n times a single time-step to simulate acting and env interaction with an LSTM.
        preprocessed_states = np.ndarray(shape=(1000, 2, 2), dtype=np.float)
        actions = np.ndarray(shape=(1000, 2, 1), dtype=np.int)
        for i, time_percentage in enumerate(time_percentages):
            ret = test.test((
                "get_preprocessed_state_and_action",
                # expand time dim at 1st slot as we are time-major == False
                [np.expand_dims(states[i], 1), tuple([initial_internal_states]), time_percentage]
            ))
            preprocessed_states[i] = ret["preprocessed_state"][:, 0, :]  # take out time-rank again ()
            actions[i] = ret["action"]
            # Check c/h-state shape.
            self.assertEqual(ret["nn_outputs"][1][0].shape, (2, 3))  # batch-size=2, LSTM units=3
            self.assertEqual(ret["nn_outputs"][1][1].shape, (2, 3))

        # Check all preprocessed states (easy: just divided by 10).
        expected_preprocessed_state = states / 10
        recursive_assert_almost_equal(preprocessed_states, expected_preprocessed_state)

        # Check the exploration functionality over the actions.
        # Not checking mean as we are mostly in the non-exploratory region, that's why the stddev should be small.
        stddev_actions = actions.std()
        self.assertGreater(stddev_actions, 0.4)
        self.assertLess(stddev_actions, 0.6)

Пример #6

0

Показать файл

Файл: agent_test.py Проект: EmpereurCC/RLgraph_exp

    def check_env(self, prop, expected_value, decimals=7):
        """
        Checks a property of our environment for (almost) equality.

        Args:
            prop (str): The name of the Environment's property to check.
            expected_value (any): The expected value of the given property.
            decimals (Optional[int]): The number of digits after the floating point up to which to compare actual
                and expected values.
        """
        is_value = getattr(self.env, prop, None)
        recursive_assert_almost_equal(is_value, expected_value, decimals=decimals)

Пример #7

0

Показать файл

Файл: test_sequential_vector_env.py Проект: EmpereurCC/RLgraph_exp

    def test_sequential_vector_env(self):
        num_envs = 4
        env = SequentialVectorEnv(num_environments=num_envs,
                                  env_spec={
                                      "type": "gridworld",
                                      "world": "2x2"
                                  })

        # Simple test runs with fixed actions.
        # X=player's position
        s = env.reset(index=0)  # ["XH", " G"]  X=player's position
        self.assertTrue(s == 0)

        s = env.reset_all()
        all(self.assertTrue(s_ == 0) for s_ in s)

        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(recursive_assert_almost_equal(r_, -0.1) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)

        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" H", " X"]
        all(self.assertTrue(s_ == 3) for s_ in s)
        all(recursive_assert_almost_equal(r_, 1.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        [env.reset(index=i)
         for i in range(num_envs)]  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" X", " G"] -> in the hole
        all(self.assertTrue(s_ == 2) for s_ in s)
        all(self.assertTrue(r_ == -5.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        # Run against a wall.
        env.reset_all()  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([3
                               for _ in range(num_envs)])  # left: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(recursive_assert_almost_equal(r_, -0.1) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(recursive_assert_almost_equal(r_, -0.1) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([0 for _ in range(num_envs)])  # up: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(recursive_assert_almost_equal(r_, -0.1) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)

Пример #8

0

Показать файл

Файл: test_dqn_agent_short_task_learning.py Проект: theSoenke/rlgraph

    def test_double_dqn_on_2x2_grid_world(self):
        """
        Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld.
        """
        env_spec = dict(world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            dueling_q=False,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
            execution_spec=dict(seed=10),
            update_spec=dict(update_interval=4,
                             batch_size=24,
                             sync_interval=32),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 1000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], 350)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

Пример #9

0

Показать файл

Файл: test_grid_world.py Проект: EmpereurCC/RLgraph_exp

    def test_2x2_grid_world_using_flow_methods(self):
        """
        Tests a minimalistic 2x2 GridWorld.
        """
        env = GridWorld(world="2x2")

        # Simple test runs with fixed actions.
        # X=player's position
        s, r, t = env.step_flow(2)  # down: [" H", "XG"]
        self.assertTrue(s == 1)
        recursive_assert_almost_equal(r, -0.1)
        self.assertTrue(not t)
        s, r, t = env.step_flow(1)  # right: [" H", " X"]
        self.assertTrue(s == 0)
        self.assertTrue(r == 1.0)
        self.assertTrue(t)

        s, r, t = env.step_flow(1)  # right: [" X", " G"] -> in the hole
        self.assertTrue(s == 0)
        self.assertTrue(r == -5.0)
        self.assertTrue(t)

        # Run against a wall.
        s, r, t = env.step_flow(3)  # left: ["XH", " G"]
        self.assertTrue(s == 0)
        recursive_assert_almost_equal(r, -0.1)
        self.assertTrue(not t)
        s, r, t = env.step_flow(2)  # down: [" H", "XG"]
        self.assertTrue(s == 1)
        recursive_assert_almost_equal(r, -0.1)
        self.assertTrue(not t)
        s, r, t = env.step_flow(0)  # up: ["XH", " G"]
        self.assertTrue(s == 0)
        recursive_assert_almost_equal(r, -0.1)
        self.assertTrue(not t)

Пример #10

0

Показать файл

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 1000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        # Marge q-tables of all four GPUs:
        agent.last_q_table["q_values"] = agent.last_q_table[
            "q_values"].reshape((48, 4))

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

Пример #11

0

Показать файл

Файл: agent_test.py Проект: EmpereurCC/RLgraph_exp

    def check_agent(self, prop, expected_value, decimals=7, key_or_index=None):
        """
        Checks a property of our Agent for (almost) equality.

        Args:
            prop (str): The name of the Agent's property to check.
            expected_value (any): The expected value of the given property.
            decimals (Optional[int]): The number of digits after the floating point up to which to compare actual
                and expected values.
            key_or_index (Optional[int, str]): Optional key or index into the propery in case of nested data structure.
        """
        is_value = getattr(self.agent, prop, None)
        if key_or_index is not None:
            is_value = is_value[key_or_index]
        recursive_assert_almost_equal(is_value, expected_value, decimals=decimals)

Пример #12

0

Показать файл

Файл: agent_test.py Проект: samialabed/rlgraph

    def check_var(self, variable, expected_value, decimals=7):
        """
        Checks a value of our an Agent's variable for (almost) equality against an expected one.

        Args:
            variable (str): The global scope (within Agent's root-component) of the variable to check.
            expected_value (any): The expected value of the given variable.
            decimals (Optional[int]): The number of digits after the floating point up to which to compare actual
                and expected values.
        """
        variables_dict = self.agent.root_component.variables
        assert variable in variables_dict, "ERROR: Variable '{}' not found in Agent '{}'!".\
            format(variable, self.agent.name)
        var = variables_dict[variable]
        value = self.graph_executor.read_variable_values(var)
        recursive_assert_almost_equal(value, expected_value, decimals=decimals)

Пример #13

0

Показать файл

    def test_learning_2x2_grid_world(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        dqn.
        """
        env_spec = dict(type="grid-world", world="2x2", save_mode=False)
        agent_config = config_from_path(
            "configs/apex_agent_for_2x2_gridworld.json")
        # TODO remove after unified backends
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=5000,
                          report_interval=100,
                          report_interval_min_seconds=1))
        full_worker_stats = executor.result_by_worker()
        print("All finished episode rewards")
        print(full_worker_stats["episode_rewards"])

        print("STATES:\n{}".format(
            executor.local_agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(executor.local_agent.last_q_table["q_values"],
                      decimals=2)))

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(
                executor.local_agent.last_q_table["states"],
                executor.local_agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

Пример #14

0

Показать файл

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system.

        THIS TEST REQUIRES A MULTI GPU SYSTEM.
        """
        #root_logger.setLevel(DEBUG)  # test
        env = GridWorld("2x2")
        agent = DQNAgent.from_spec(
            config_from_path("configs/multi_gpu_dqn_for_2x2_gridworld.json"),
            dueling_q=False,
            state_space=env.state_space,
            action_space=env.action_space,
            observe_spec=dict(buffer_size=100),
            # Rule of thumb for multi-GPU (with n GPUs): n-fold batch-size and learning rate w/ respect to 1 GPU.
            update_spec=dict(update_interval=4, batch_size=48, sync_interval=32),
            optimizer_spec=dict(type="adam", learning_rate=0.15),
            store_last_q_table=True
        )

        time_steps = 400
        worker = SingleThreadedWorker(env_spec=lambda: env, agent=agent, worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], 250)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"], agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values, expected_q_values_per_state[state], decimals=0)

Пример #15

0

Показать файл

    def test_impala_on_2x2_grid_world(self):
        """
        Creates a single IMPALAAgent and runs it via the IMPALAWorker on a simple 2x2 GridWorld.
        """
        env = GridWorld("2x2")
        agent = IMPALAAgent.from_spec(
            config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
            state_space=env.state_space,
            action_space=env.action_space,
            execution_spec=dict(seed=12),
            update_spec=dict(update_interval=4, batch_size=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
        )

        learn_updates = 1000
        # Setup the queue runner.
        agent.call_api_method("setup_queue_runner")
        for _ in range(learn_updates):
            agent.update()

        #print("STATES:\n{}".format(agent.last_q_table["states"]))
        #print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        #self.assertEqual(results["timesteps_executed"], time_steps)
        #self.assertEqual(results["env_frames"], time_steps)
        #self.assertGreaterEqual(results["mean_episode_reward"], -3.5)
        #self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        #self.assertLessEqual(results["episodes_executed"], 350)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

Пример #16

0

Показать файл

    def test_weights_getting_setting(self):
        """
        Tests getting and setting of the Agent's weights.
        """
        env = GridWorld(world="2x2")
        agent = Agent.from_spec(
            config_from_path("configs/dqn_agent_for_functionality_test.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        weights = agent.get_weights()
        new_weights = {}
        for key, weight in weights["policy_weights"].items():
            new_weights[key] = weight + 0.01

        agent.set_weights(new_weights)
        new_actual_weights = agent.get_weights()

        recursive_assert_almost_equal(new_actual_weights["policy_weights"],
                                      new_weights)

Пример #17

0

Показать файл

Файл: test_sac_agent_functionality.py Проект: theSoenke/rlgraph

    def test_policy_sync(self):
        """
        Tests weight syncing of policy (and only policy, not Q-functions).
        """
        env = OpenAIGymEnv("CartPole-v0")
        agent = SACAgent.from_spec(
            config_from_path("configs/sac_agent_for_cartpole.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        weights = agent.get_weights()
        print("weights =", weights.keys())

        new_weights = {}
        for key, value in weights["policy_weights"].items():
            new_weights[key] = value + 0.01

        agent.set_weights(policy_weights=new_weights,
                          value_function_weights=None)

        updated_weights = agent.get_weights()["policy_weights"]
        recursive_assert_almost_equal(updated_weights, new_weights)

Пример #18

0

Показать файл

Файл: test_variational_auto_encoders.py Проект: EmpereurCC/RLgraph_exp

    def test_simple_variational_auto_encoder(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        input_spaces = dict(
            input_=FloatBox(shape=(3,), add_batch_rank=True), z_vector=FloatBox(shape=(1,), add_batch_rank=True)
        )

        variational_auto_encoder = VariationalAutoEncoder(
            z_units=1,
            encoder_network_spec=config_from_path("configs/test_vae_encoder_network.json"),
            decoder_network_spec=config_from_path("configs/test_vae_decoder_network.json")
        )

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=variational_auto_encoder, input_spaces=input_spaces)

        # Batch of size=3.
        input_ = np.array([[0.1, 0.2, 0.3], [1.0, 2.0, 3.0], [10.0, 20.0, 30.0]])
        global_scope = "variational-auto-encoder/"
        # Calculate output manually.
        var_dict = test.read_variable_values(variational_auto_encoder.variable_registry)

        encoder_network_out = dense_layer(
            input_, var_dict[global_scope+"encoder-network/encoder-layer/dense/kernel"],
            var_dict[global_scope+"encoder-network/encoder-layer/dense/bias"]
        )
        expected_mean = dense_layer(
            encoder_network_out, var_dict[global_scope+"mean-layer/dense/kernel"],
            var_dict[global_scope+"mean-layer/dense/bias"]
        )
        expected_stddev = dense_layer(
            encoder_network_out, var_dict[global_scope + "stddev-layer/dense/kernel"],
            var_dict[global_scope + "stddev-layer/dense/bias"]
        )
        out = test.test(("encode", input_), expected_outputs=None)
        recursive_assert_almost_equal(out["mean"], expected_mean, decimals=5)
        recursive_assert_almost_equal(out["stddev"], np.exp(expected_stddev), decimals=5)
        self.assertTrue(out["z_sample"].shape == (3, 1))

        test.terminate()

Пример #19

0

Показать файл

Файл: test_ring_buffer.py Проект: EmpereurCC/RLgraph_exp

    def test_capacity_with_episodes(self):
        """
        Tests if inserts of non-terminals work.

        Note that this does not test episode semantics itself, which are tested below.
        """
        ring_buffer = RingBuffer(capacity=self.capacity)
        test = ComponentTest(component=ring_buffer,
                             input_spaces=self.input_spaces)
        # Internal memory variables.
        ring_buffer_variables = test.get_variable_values(
            ring_buffer, self.ring_buffer_variables)
        size_value = ring_buffer_variables["size"]
        index_value = ring_buffer_variables["index"]
        num_episodes_value = ring_buffer_variables["num-episodes"]
        episode_index_values = ring_buffer_variables["episode-indices"]

        # Assert indices 0 before insert.
        self.assertEqual(size_value, 0)
        self.assertEqual(index_value, 0)
        self.assertEqual(num_episodes_value, 0)
        self.assertEqual(np.sum(episode_index_values), 0)

        # Insert one more element than capacity. Note: this is different than
        # replay test because due to episode semantics, it matters if
        # these are terminal or not. This tests if episode index updating
        # causes problems if none of the inserted elements are terminal.
        observation = non_terminal_records(self.record_space,
                                           self.capacity + 1)
        test.test(("insert_records", observation), expected_outputs=None)

        ring_buffer_variables = test.get_variable_values(
            ring_buffer, self.ring_buffer_variables)
        size_value = ring_buffer_variables["size"]
        index_value = ring_buffer_variables["index"]
        num_episodes_value = ring_buffer_variables["num-episodes"]
        episode_index_values = ring_buffer_variables["episode-indices"]

        # Size should be equivalent to capacity when full.
        self.assertEqual(size_value, self.capacity)

        # Index should be one over capacity due to modulo.
        self.assertEqual(index_value, 1)
        self.assertEqual(num_episodes_value, 0)
        self.assertEqual(np.sum(episode_index_values), 0)

        # If we fetch n elements, we expect to see exactly the last n.
        for last_n in range(1, 6):
            batch = test.test(("get_records", last_n), expected_outputs=None)
            recursive_assert_almost_equal(
                batch["actions"]["action1"],
                observation["actions"]["action1"][-last_n:])
            recursive_assert_almost_equal(
                batch["states"]["state2"],
                observation["states"]["state2"][-last_n:])
            recursive_assert_almost_equal(batch["terminals"],
                                          observation["terminals"][-last_n:])

Пример #20

0

Показать файл

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 2000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check all learnt Q-values.
        q_values = agent.graph_executor.execute(
            ("get_q_values", one_hot(np.array([0, 1]), depth=4)))[:]
        recursive_assert_almost_equal(q_values[0], (0.8, -5, 0.9, 0.8),
                                      decimals=1)
        recursive_assert_almost_equal(q_values[1], (0.8, 1.0, 0.9, 0.9),
                                      decimals=1)

Пример #21

0

Показать файл

    def test_random_env(self):
        """
        Tests deterministic functionality of RandomEnv.
        """
        env = RandomEnv(state_space=FloatBox(shape=(2, 2)), action_space=IntBox(2), deterministic=True)

        # Simple test runs with fixed actions.
        s = env.reset()
        recursive_assert_almost_equal(s, np.array([[0.77132064, 0.02075195], [0.63364823, 0.74880388]]))
        s, r, t, _ = env.step(env.action_space.sample())
        recursive_assert_almost_equal(s, np.array([[0.1980629, 0.7605307], [0.1691108, 0.0883398]]))
        s, r, t, _ = env.step(env.action_space.sample())
        recursive_assert_almost_equal(r, np.array(0.7217553))
        s, r, t, _ = env.step(env.action_space.sample())
        self.assertEqual(t, False)
        s, r, t, _ = env.step(env.action_space.sample())
        recursive_assert_almost_equal(s, np.array([[0.4418332, 0.434014], [0.617767 , 0.5131382]]))
        s, r, t, _ = env.step(env.action_space.sample())

Пример #22

0

Показать файл

Файл: test_impala_agent_short_task_learning.py Проект: EmpereurCC/RLgraph_exp

    def test_impala_on_2x2_grid_world(self):
        """
        Creates a single IMPALAAgent and runs it via a simple loop on a 2x2 GridWorld.
        """
        env = GridWorld("2x2")
        agent = IMPALAAgent.from_spec(
            config_from_path("configs/impala_agent_for_2x2_gridworld.json"),
            state_space=env.state_space,
            action_space=env.action_space,
            execution_spec=dict(seed=12),
            update_spec=dict(batch_size=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05))

        learn_updates = 50
        for i in range(learn_updates):
            ret = agent.update()
            mean_return = self._calc_mean_return(ret)
            print("i={} Loss={:.4} Avg-reward={:.2}".format(
                i, float(ret[1]), mean_return))

        # Assume we have learned something.
        self.assertGreater(mean_return, -0.1)

        # Check the last action probs for the 2 valid next_states (start (after a reset) and one below start).
        action_probs = ret[3]["action_probs"].reshape((80, 4))
        next_states = ret[3]["states"][:, 1:].reshape((80, ))
        for s_, probs in zip(next_states, action_probs):
            # Start state:
            # - Assume we picked "right" in state=1 (in order to step into goal state).
            # - OR we picked "up" or "left" in state=0 (unlikely, but possible).
            if s_ == 0:
                recursive_assert_almost_equal(probs[0], 0.0, decimals=2)
                self.assertTrue(probs[1] > 0.99 or probs[2] > 0.99)
                recursive_assert_almost_equal(probs[3], 0.0, decimals=2)
            # One below start:
            # - Assume we picked "down" in start state with very large probability.
            # - OR we picked "left" or "down" in state=1 (unlikely, but possible).
            elif s_ == 1:
                recursive_assert_almost_equal(probs[0], 0.0, decimals=2)
                self.assertTrue(probs[1] > 0.99 or probs[2] > 0.99)
                recursive_assert_almost_equal(probs[3], 0.0, decimals=2)

        agent.terminate()

Пример #23

0

Показать файл