Пример #1
0
    def __init__(self, capacity=1000, alpha=1.0, beta=1.0):
        """
        Args:
            capacity (int): Max capacity.
            alpha (float): Initial weight.
            beta (float): Prioritisation factor.
        """
        super(ApexMemory, self).__init__()

        self.memory_values = []
        self.index = 0
        self.capacity = capacity
        self.size = 0
        self.max_priority = 1.0
        self.alpha = alpha
        self.beta = beta

        self.default_new_weight = np.power(self.max_priority, self.alpha)
        self.priority_capacity = 1
        while self.priority_capacity < self.capacity:
            self.priority_capacity *= 2

        # Create segment trees, initialize with neutral elements.
        sum_values = [0.0 for _ in range_(2 * self.priority_capacity)]
        sum_segment_tree = MemSegmentTree(sum_values, self.priority_capacity,
                                          operator.add)
        min_values = [float('inf') for _ in range_(2 * self.priority_capacity)]
        min_segment_tree = MemSegmentTree(min_values, self.priority_capacity,
                                          min)
        self.merged_segment_tree = MinSumSegmentTree(
            sum_tree=sum_segment_tree,
            min_tree=min_segment_tree,
            capacity=self.priority_capacity)
Пример #2
0
    def test_rlgraph_sampling(self):
        """
        Tests RLgraph's sampling performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 ray_compress(record['states']),
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        start = time.monotonic()
        for _ in range_(self.samples):
            batch_tuple = memory.get_records(self.sample_batch_size)
        end = time.monotonic() - start
        tp = self.samples / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing sampling performance:')
        print('Sampled {} batches, throughput: {} batches/s, total time: {} s'.format(
            self.samples, tp, end
        ))
Пример #3
0
    def test_rlgraph_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        chunksize = 32
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]

        start = time.monotonic()
        for chunk, loss_values in zip(records, loss_values):
            # Each record now is a chunk.
            for i in range_(chunksize):
                memory.insert_records((
                    ray_compress(chunk['states'][i]),
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))
            batch, indices, weights = memory.get_records(self.sample_batch_size)
            memory.update_records(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('RLGraph: Testing combined op performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))
Пример #4
0
def get_list_registry(from_space, capacity=None, initializer=0, flatten=True, add_batch_rank=False):
    """
    Creates a list storage for a space by providing an ordered dict mapping space names
    to empty lists.

    Args:
        from_space: Space to create registry from.
        capacity (Optional[int]): Optional capacity to initalize list.
        initializer (Optional(any)): Optional initializer for list if capacity is not None.
        flatten (bool): Whether to produce a FlattenedDataOp with auto-keys.

        add_batch_rank (Optional[bool,int]): If from_space is given and is True, will add a 0th rank (None) to
            the created variable. If it is an int, will add that int instead of None.
            Default: False.

    Returns:
        dict: Container dict mapping spaces to empty lists.
    """
    if flatten:
        if capacity is not None:
            var = from_space.flatten(
                custom_scope_separator="-", scope_separator_at_start=False,
                mapping=lambda k, primitive: [initializer for _ in range_(capacity)]
            )
        else:
            var = from_space.flatten(
                custom_scope_separator="-", scope_separator_at_start=False,
                mapping=lambda k, primitive: []
            )
    else:
        if capacity is not None:
            var = [initializer for _ in range_(capacity)]
        else:
            var = []
    return var
Пример #5
0
    def test_rlgraph_updating(self):
        """
        Tests RLGraph's memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_records(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))
Пример #6
0
 def test_ray_sampling(self):
     """
     Tests Ray's memory performance.
     """
     assert get_distributed_backend() == "ray"
     memory = PrioritizedReplayBuffer(
         size=self.capacity,
         alpha=1.0,
         clip_rewards=True
     )
     records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
     for record in records:
         memory.add(
             obs_t=ray_compress(record['states']),
             action=record['actions'],
             reward=record['reward'],
             obs_tp1=ray_compress(record['states']),
             done=record['terminals'],
             weight=None
         )
     start = time.monotonic()
     for _ in range_(self.samples):
         batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
     end = time.monotonic() - start
     tp = self.samples / end
     print('#### Testing Ray Prioritized Replay memory ####')
     print('Testing sampling performance:')
     print('Sampled {} batches, throughput: {} samples/s, total time: {} s'.format(
         self.samples, tp, end
     ))
Пример #7
0
 def test_box_spaces(self):
     """
     Tests all BoxSpaces via sample/contains loop. With and without batch-rank,
     different batch sizes, and different los/high combinations (including no bounds).
     """
     for class_ in [FloatBox, IntBox, BoolBox, TextBox]:
         for add_batch_rank in [False, True]:
             # TODO: Test time-rank more thoroughly.
             for add_time_rank in [False, True]:
                 if class_ != BoolBox and class_ != TextBox:
                     for low, high in [(None, None), (-1.0, 10.0), ((1.0, 2.0), (3.0, 4.0)),
                                       (((1.0, 2.0, 3.0), (4.0, 5.0, 6.0)), ((7.0, 8.0, 9.0), (10.0, 11.0, 12.0)))]:
                         space = class_(low=low, high=high, add_batch_rank=add_batch_rank,
                                        add_time_rank=add_time_rank)
                         if add_batch_rank is False:
                             sample = space.sample()
                             self.assertTrue(space.contains(sample))
                         else:
                             for batch_size in range_(1, 4):
                                 samples = space.sample(size=batch_size)
                                 for s in samples:
                                     self.assertTrue(space.contains(s))
                         # TODO: test zero() method perperly for all cases
                         #all_0s = space.zeros()
                         #self.assertTrue(all(v == 0 for v in all_0s))
                 else:
                     space = class_(add_batch_rank=add_batch_rank, add_time_rank=add_time_rank)
                     if add_batch_rank is False:
                         sample = space.sample()
                         self.assertTrue(space.contains(sample))
                     else:
                         for batch_size in range_(1, 4):
                             samples = space.sample(size=batch_size)
                             for s in samples:
                                 self.assertTrue(space.contains(s))
Пример #8
0
    def test_ray_updating(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_priorities(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))
Пример #9
0
    def _graph_fn_call(self, inputs):
        """
        Images come in with either a batch dimension or not.
        """
        if self.backend == "python" or get_backend() == "python":
            if isinstance(inputs, list):
                inputs = np.asarray(inputs)
            had_single_color_dim = (inputs.shape[-1] == 1)
            # Batch of samples.
            if inputs.ndim == 4:
                resized = []
                for i in range_(len(inputs)):
                    resized.append(
                        cv2.resize(inputs[i],
                                   dsize=(self.width, self.height),
                                   interpolation=self.cv2_interpolation))
                resized = np.asarray(resized)
            # Single sample.
            else:
                resized = cv2.resize(inputs,
                                     dsize=(self.width, self.height),
                                     interpolation=self.cv2_interpolation)

            # cv2.resize removes the color rank, if its dimension is 1 (e.g. grayscale), add it back here.
            if had_single_color_dim is True:
                resized = np.expand_dims(resized, axis=-1)

            return resized
        elif get_backend() == "pytorch":
            if isinstance(inputs, list):
                inputs = torch.tensor(inputs)

            had_single_color_dim = (inputs.shape[-1] == 1)
            # Batch of samples.
            if len(inputs.shape) == 4:
                resized = []
                for i in range_(len(inputs)):
                    # Get numpy array.
                    resized.append(
                        cv2.resize(inputs[i].numpy(),
                                   dsize=(self.width, self.height),
                                   interpolation=self.cv2_interpolation))
                resized = torch.tensor(resized)
            # Single sample.
            else:
                resized = cv2.resize(inputs.numpy(),
                                     dsize=(self.width, self.height),
                                     interpolation=self.cv2_interpolation)

            # cv2.resize removes the color rank, if its dimension is 1 (e.g. grayscale), add it back here.
            if had_single_color_dim is True:
                resized = torch.unsqueeze(resized, dim=-1)

            return resized
        elif get_backend() == "tf":
            return tf.image.resize_images(images=inputs,
                                          size=(self.width, self.height),
                                          method=self.tf_interpolation)
Пример #10
0
    def _truncate_n_step(self,
                         states,
                         actions,
                         rewards,
                         next_states,
                         terminals,
                         was_terminal=True):
        """
        Computes n-step truncation for exactly one episode segment of one environment.

        Returns:
             n-step truncated (shortened) version.
        """
        if self.n_step_adjustment > 1:
            new_len = len(states) - self.n_step_adjustment + 1

            # There are 2 cases. If the trajectory did not end in a terminal,
            # we just have to move states forward and truncate.
            if was_terminal:
                # We know the ONLY last terminal is True.
                terminal_position = len(rewards) - 1
                for i in range(len(rewards)):
                    for j in range(1, self.n_step_adjustment):
                        # Outside sample data. Stop inner loop and set truncate = True
                        if i + j >= len(next_states):
                            break
                        # Normal case: No terminal ahead (so far) in n-step sequence.
                        if i + j < terminal_position:
                            next_states[i] = next_states[i + j]
                            rewards[i] += self.discount**j * rewards[i + j]
                        # Terminal ahead: Don't go beyond it.
                        # Repeat it for the remaining n-steps and always assume r=0.0.
                        else:
                            next_states[i] = next_states[terminal_position]
                            terminals[i] = True
                            if i + j <= terminal_position:
                                rewards[i] += self.discount**j * rewards[i + j]
            else:
                # We know this segment does not contain any terminals so we simply have to adjust next
                # states and rewards.
                for i in range_(len(rewards) - self.n_step_adjustment + 1):
                    for j in range_(1, self.n_step_adjustment):
                        next_states[i] = next_states[i + j]
                        rewards[i] += self.discount**j * rewards[i + j]

                if self.agent.flat_action_space is not None:
                    for arr in [states, rewards, next_states, terminals]:
                        del arr[new_len:]
                    # Delete container actions separately.
                    for name in self.agent.flat_action_space.keys():
                        del actions[name][new_len:]
                else:
                    for arr in [
                            states, actions, rewards, next_states, terminals
                    ]:
                        del arr[new_len:]

        return states, actions, rewards, next_states, terminals
Пример #11
0
    def test_ray_prioritized_replay_insert(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        # Test individual inserts.
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        end = time.monotonic() - start
        tp = len(records) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.add(
                    obs_t=chunk['states'][i],
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=chunk['states'][i],
                    done=chunk['terminals'][i],
                    weight=None
                )
        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))
Пример #12
0
    def test_rlgraph_apex_insert(self):
        """
        Tests RLgraph's python memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        # Testing insert performance
        records = [self.record_space.sample(size=1) for _ in range(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        end = time.monotonic() - start
        tp = len(records) / end

        print('#### Testing RLGraph python prioritized replay ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.insert_records((
                    chunk['states'][i],
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))

        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))
Пример #13
0
    def __init__(self, agent, env_spec=None, num_environments=1, frameskip=1, render=False,
                 worker_executes_exploration=True, exploration_epsilon=0.1, episode_finish_callback=None):
        """
        Args:
            agent (Agent): Agent to execute environment on.
            env_spec Optional[Union[callable, dict]]): Either an environment spec or a callable returning a new
                environment.
            num_environments (int): How many single Environments should be run in parallel in a SequentialVectorEnv.
            frameskip (int): How often actions are repeated after retrieving them from the agent.
                This setting can be overwritten in the single calls to the different `execute_..` methods.
            render (bool): Whether to render the environment after each action.
                Default: False.
            worker_executes_exploration (bool): If worker executes exploration by sampling.
            exploration_epsilon (Optional[float]): Epsilon to use if worker executes exploration.
        """
        super(Worker, self).__init__()
        self.num_environments = num_environments
        self.logger = logging.getLogger(__name__)

        # VectorEnv was passed in directly -> Use that one.
        if isinstance(env_spec, VectorEnv):
            self.vector_env = env_spec
            self.num_environments = self.vector_env.num_environments
            self.env_ids = ["env_{}".format(i) for i in range_(self.num_environments)]
        # `Env_spec` is for single envs inside a SequentialVectorEnv.
        elif env_spec is not None:
            self.vector_env = SequentialVectorEnv(env_spec=env_spec, num_environments=self.num_environments)
            self.env_ids = ["env_{}".format(i) for i in range_(self.num_environments)]
        # No env_spec.
        else:
            self.vector_env = None
            self.env_ids = []

        self.agent = agent
        self.frameskip = frameskip
        self.render = render

        # Update schedule if worker is performing updates.
        self.updating = None
        self.steps_before_update = None
        self.update_interval = None
        self.update_steps = None
        self.sync_interval = None
        self.episodes_since_update = 0

        # Default val or None?
        self.update_mode = "time_steps"

        self.worker_executes_exploration = worker_executes_exploration
        self.exploration_epsilon = exploration_epsilon

        self.episode_finish_callback = episode_finish_callback
Пример #14
0
    def test_copying_a_component(self):
        # Flatten a simple 2x2 FloatBox to (4,).
        space = FloatBox(shape=(2,2), add_batch_rank=False)

        flatten_orig = Flatten()
        flatten_copy = flatten_orig.copy(scope="flatten-copy")
        component_to_test = Component(flatten_orig, flatten_copy,
                                      inputs=["input1", "input2"], outputs=["output1", "output2"],
                                      connections=[
                                          ["input1", ["flatten", "input"]],
                                          ["input2", ["flatten-copy", "input"]],
                                          [["flatten", "output"], "output1"],
                                          [["flatten-copy", "output"], "output2"]
                                      ])
        test = ComponentTest(component=component_to_test, input_spaces=dict(input1=space, input2=space))

        input_ = dict(
            input1=np.array([[0.5, 2.0], [1.0, 2.0]]),
            input2=np.array([[1.0, 2.0], [3.0, 4.0]])
        )
        expected = dict(
            output1=np.array([0.5, 2.0, 1.0, 2.0]),
            output2=np.array([1.0, 2.0, 3.0, 4.0])
        )
        for i in range_(2):
            test.test(out_socket_names="output"+str(i+1), inputs=input_, expected_outputs=expected["output"+str(i+1)])
Пример #15
0
    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity,
                                      next_states=True,
                                      alpha=self.alpha,
                                      beta=self.beta)
        memory.create_variables(self.input_spaces)

        observation = memory.record_space_flat.sample(size=1)
        memory.insert_records(observation)

        # Test chunked insert
        observation = memory.record_space_flat.sample(size=5)
        memory.insert_records(observation)

        # Also test Apex version
        memory = ApexMemory(capacity=self.capacity,
                            alpha=self.alpha,
                            beta=self.beta)
        observation = self.apex_space.sample(size=5)
        for i in range_(5):
            memory.insert_records(
                (observation['states'][i], observation['actions'][i],
                 observation['reward'][i], observation['terminals'][i],
                 observation['states'][i], observation["weights"][i]))
Пример #16
0
    def test_individual_env(self):
        env = Environment.from_spec(self.env_spec)
        agent = Agent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            config_from_path("configs/dqn_agent_for_pong.json"),
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space)

        state = env.reset()
        start = time.monotonic()
        ep_length = 0
        for _ in range_(self.samples):
            action = agent.get_action(state)
            state, reward, terminal, info = env.step(action)

            ep_length += 1
            if terminal:
                print("reset after {} states".format(ep_length))
                env.reset()
                ep_length = 0

        runtime = time.monotonic() - start
        tp = self.samples / runtime

        print('Testing individual env {} performance:'.format(
            self.env_spec["gym_env"]))
        print('Ran {} steps, throughput: {} states/s, total time: {} s'.format(
            self.samples, tp, runtime))
Пример #17
0
    def test_simple_python_preprocessor_stack(self):
        """
        Tests a pure python preprocessor stack.
        """
        space = FloatBox(shape=(2, ), add_batch_rank=True)
        # python PreprocessorStack
        multiply = dict(type="multiply", factor=0.5, scope="m")
        divide = dict(type="divide", divisor=0.5, scope="d")
        stack = PreprocessorStack(multiply, divide, backend="python")
        for sub_comp_scope in ["m", "d"]:
            stack.sub_components[sub_comp_scope].create_variables(
                input_spaces=dict(inputs=space))

        #test = ComponentTest(component=stack, input_spaces=dict(inputs=float))

        for _ in range_(3):
            # Call fake API-method directly (ok for PreprocessorStack).
            stack.reset()
            input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]])
            expected = input_
            #test.test(("preprocess", input_), expected_outputs=expected)
            out = stack.preprocess(input_)
            recursive_assert_almost_equal(out, input_)

            input_ = space.sample()
            #test.test(("preprocess", input_), expected_outputs=expected)
            out = stack.preprocess(input_)
            recursive_assert_almost_equal(out, input_)
Пример #18
0
    def observe(self, env_sample):
        """
        Observes experience(s).

        N.b. For performance reason, data layout is slightly different for apex.
        """
        records = env_sample.get_batch()
        num_records = len(records['states'])

        # TODO port to tf PR behaviour.
        if self.clip_rewards:
            rewards = np.sign(records["rewards"])
        else:
            rewards = records["rewards"]
        for i in range_(num_records):
            # If Actions is dict with vectors per key, convert to single dict.
            if isinstance(records["actions"], dict):
                action = {k: v[i] for k, v in records["actions"].items()}
            else:
                action = records["actions"][i]

            self.memory.insert_records(
                (records["states"][i], action, rewards[i],
                 records["terminals"][i], records["next_states"][i],
                 records["importance_weights"][i]))
    def test_python_sequence_preprocessor(self):
        seq_len = 3
        space = FloatBox(shape=(1,), add_batch_rank=True)
        sequencer = Sequence(sequence_length=seq_len, batch_size=4, add_rank=True, backend="python")
        sequencer.create_variables(input_spaces=dict(preprocessing_inputs=space))

        #test = ComponentTest(component=sequencer, input_spaces=dict(apply=space))

        for _ in range_(3):
            sequencer._graph_fn_reset()
            self.assertEqual(sequencer.index, -1)
            input_ = np.asarray([[1.0], [2.0], [3.0], [4.0]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 0)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.0, 1.0]], [[2.0, 2.0, 2.0]], [[3.0, 3.0, 3.0]], [[4.0, 4.0, 4.0]]])
            )
            input_ = np.asarray([[1.1], [2.2], [3.3], [4.4]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 1)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.0, 1.1]], [[2.0, 2.0, 2.2]], [[3.0, 3.0, 3.3]], [[4.0, 4.0, 4.4]]])
            )
            input_ = np.asarray([[1.11], [2.22], [3.33], [4.44]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 2)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.0, 1.1, 1.11]], [[2.0, 2.2, 2.22]], [[3.0, 3.3, 3.33]], [[4.0, 4.4, 4.44]]])
            )
            input_ = np.asarray([[10], [20], [30], [40]])
            out = sequencer._graph_fn_apply(input_)
            self.assertEqual(sequencer.index, 0)
            recursive_assert_almost_equal(
                out, np.asarray([[[1.1, 1.11, 10]], [[2.2, 2.22, 20]], [[3.3, 3.33, 30]], [[4.4, 4.44, 40]]])
            )
Пример #20
0
    def update_if_necessary(self, timesteps_executed):
        """
        Calls update on the agent according to the update schedule set for this worker.

        Args:
            timesteps_executed (int): Timesteps executed thus far.

        Returns:
            float: The summed up loss (over all self.update_steps).
        """
        if self.updating:
            # Are we allowed to update?
            if timesteps_executed > self.steps_before_update and \
                    (self.agent.observe_spec["buffer_enabled"] is False or  # no update before some data in buffer
                     timesteps_executed >= self.agent.observe_spec["buffer_size"]) and \
                    timesteps_executed % self.update_interval == 0:  # update frequency check
                loss = 0
                for _ in range_(self.update_steps):
                    #l, s_, a_, r_, t_ = self.agent.update()
                    loss += self.agent.update()
                    #self.logger.info("FROM MEM: s={} a={} r={} t={}".format(s_, a_, r_, t_))
                    #loss += l
                return loss

        return None
Пример #21
0
    def test_copying_a_component(self):
        # Flatten a simple 2x2 FloatBox to (4,).
        space = FloatBox(shape=(2, 2), add_batch_rank=False)

        flatten_orig = ReShape(flatten=True, scope="A")
        flatten_copy = flatten_orig.copy(scope="B")
        container = Component(flatten_orig, flatten_copy)

        @rlgraph_api(component=container)
        def flatten1(self, input_):
            return self.sub_components["A"].apply(input_)

        @rlgraph_api(component=container)
        def flatten2(self, input_):
            return self.sub_components["B"].apply(input_)

        test = ComponentTest(component=container,
                             input_spaces=dict(input_=space))

        input_ = dict(input1=np.array([[0.5, 2.0], [1.0, 2.0]]),
                      input2=np.array([[1.0, 2.0], [3.0, 4.0]]))
        expected = dict(output1=np.array([0.5, 2.0, 1.0, 2.0]),
                        output2=np.array([1.0, 2.0, 3.0, 4.0]))
        for i in range_(1, 3):
            test.test(("flatten" + str(i), input_["input" + str(i)]),
                      expected_outputs=expected["output" + str(i)])
    def __init__(self,
                 num_environments,
                 env_spec,
                 num_background_envs=1,
                 async_reset=False):
        """
            num_background_envs (Optional([int]): Number of environments asynchronously
                reset in the background. Need to be calibrated depending on reset cost.
            async_reset (Optional[bool]): If true, resets envs asynchronously in another thread.
        """
        self.environments = []

        for _ in range_(num_environments):
            if isinstance(env_spec, dict):
                env = Environment.from_spec(env_spec)
            elif hasattr(env_spec, '__call__'):
                env = env_spec()
            else:
                raise ValueError(
                    "Env_spec must be either a dict containing an environment spec or a callable"
                    "returning a new environment object.")
            self.environments.append(env)

        super(SequentialVectorEnv,
              self).__init__(num_environments=num_environments,
                             state_space=self.environments[0].state_space,
                             action_space=self.environments[0].action_space)

        self.async_reset = async_reset
        if self.async_reset:
            self.resetter = ThreadedResetter(env_spec, num_background_envs)
        else:
            self.resetter = Resetter()
Пример #23
0
    def create_remote_workers(self, cls, num_actors, agent_config, worker_spec,
                              *args):
        """
        Creates Ray actors for remote execution.

        Args:
            cls (Union[RayValueWorker, RayPolicyWorker]): RayActor class.
            num_actors (int): Num RayActor to create.
            agent_config (dict): Agent config.
            worker_spec (dict): Worker spec.
            *args (any): Arguments for RayActor class.

        Returns:
            list: Remote Ray actors.
        """
        workers = []
        cls_as_remote = cls.as_remote(num_cpus=self.num_cpus_per_worker,
                                      num_gpus=self.num_gpus_per_worker).remote

        # Create remote objects and schedule init tasks.
        ray_constant_exploration = worker_spec.get("ray_constant_exploration",
                                                   False)
        for i in range_(num_actors):
            if ray_constant_exploration is True:
                exploration_val = worker_exploration(i, num_actors)
                worker_spec["ray_exploration"] = exploration_val
            worker = cls_as_remote(deepcopy(agent_config), worker_spec, *args)
            self.worker_ids[worker] = "worker_{}".format(i)
            workers.append(worker)
            self.logger.info("Successfully built agent num {}.".format(i))

        return workers
Пример #24
0
    def test_gaussian_noise(self):
        real_mean = 10.0
        real_sd = 2.0

        noise_component = GaussianNoise(mean=real_mean, stddev=real_sd)
        test = ComponentTest(component=noise_component,
                             input_spaces=None,
                             action_space=self.action_input_space)

        # Collect outputs in `collected` list to compare moments.
        collected = list()
        collect_outs = lambda component_test, outs: collected.append(outs)

        for _ in range_(1000):
            test.test(("get_noise", None), fn_test=collect_outs)

        test_mean = np.mean(collected)
        test_sd = np.std(collected)

        # Empiric mean should be within 2 sd of real mean
        self.assertGreater(real_mean, test_mean - test_sd * 2)
        self.assertLess(real_mean, test_mean + test_sd * 2)

        # Empiric sd should be within 80 % and 120 % interval
        self.assertGreater(real_sd, test_sd * 0.8)
        self.assertLess(real_sd, test_sd * 1.2)
    def test_sequence_preprocessor_with_container_space(self):
        # Test with no batch rank.
        space = Tuple(
            FloatBox(shape=(1,)),
            FloatBox(shape=(2, 2)),
            add_batch_rank=False
        )

        component_to_test = Sequence(sequence_length=4, add_rank=False)
        test = ComponentTest(component=component_to_test, input_spaces=dict(preprocessing_inputs=space))

        for i in range_(3):
            test.test("reset")

            test.test(("apply", np.array([np.array([0.5]), np.array([[0.6, 0.7], [0.8, 0.9]])])),
                      expected_outputs=(np.array([0.5, 0.5, 0.5, 0.5]), np.array([[0.6, 0.7] * 4,
                                                                                  [0.8, 0.9] * 4])))
            test.test(("apply", np.array([np.array([0.6]), np.array([[1.1, 1.1], [1.1, 1.1]])])),
                      expected_outputs=(np.array([0.5, 0.5, 0.5, 0.6]), np.array([[0.6, 0.7, 0.6, 0.7,
                                                                                   0.6, 0.7, 1.1, 1.1],
                                                                                  [0.8, 0.9, 0.8, 0.9,
                                                                                   0.8, 0.9, 1.1, 1.1]])))
            test.test(("apply", np.array([np.array([0.7]), np.array([[2.0, 2.1], [2.2, 2.3]])])),
                      expected_outputs=(np.array([0.5, 0.5, 0.6, 0.7]), np.array([[0.6, 0.7, 0.6, 0.7,
                                                                                   1.1, 1.1, 2.0, 2.1],
                                                                                  [0.8, 0.9, 0.8, 0.9,
                                                                                   1.1, 1.1, 2.2, 2.3]])))
Пример #26
0
    def test_ornstein_uhlenbeck_noise(self):
        ou_theta = 0.15
        ou_mu = 10.0
        ou_sigma = 2.0

        noise_component = OrnsteinUhlenbeckNoise(theta=ou_theta,
                                                 mu=ou_mu,
                                                 sigma=ou_sigma)
        test = ComponentTest(component=noise_component,
                             action_space=self.action_input_space)

        # Collect outputs in `collected` list to compare moments.
        collected = list()
        collect_outs = lambda component_test, outs: collected.append(outs)

        for _ in range_(1000):
            test.test(("get_noise", None), fn_test=collect_outs)

        test_mean = np.mean(collected)
        test_sd = np.std(collected)

        print("Moments: {} / {}".format(test_mean, test_sd))

        # Empiric mean should be within 2 sd of real mean.
        self.assertGreater(ou_mu, test_mean - test_sd * 2)
        self.assertLess(ou_mu, test_mean + test_sd * 2)

        # Empiric sd should be within 45% and 200% interval.
        self.assertGreater(ou_sigma, test_sd * 0.45)
        self.assertLess(ou_sigma, test_sd * 2.0)
    def test_sequence_preprocessor(self):
        space = FloatBox(shape=(1,), add_batch_rank=True)
        sequencer = Sequence(sequence_length=3, add_rank=True)
        test = ComponentTest(component=sequencer, input_spaces=dict(preprocessing_inputs=space))

        vars = sequencer.get_variables("index", "buffer", global_scope=False)
        index, buffer = vars["index"], vars["buffer"]

        for _ in range_(3):
            test.test("reset")
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, -1)
            test.test(("apply", np.array([[0.1]])),
                      expected_outputs=np.array([[[0.1, 0.1, 0.1]]]))
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, 0)
            test.test(("apply", np.array([[0.2]])),
                      expected_outputs=np.array([[[0.1, 0.1, 0.2]]]))
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, 1)
            test.test(("apply", np.array([[0.3]])),
                      expected_outputs=np.array([[[0.1, 0.2, 0.3]]]))
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, 2)
            test.test(("apply", np.array([[0.4]])),
                      expected_outputs=np.array([[[0.2, 0.3, 0.4]]]))
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, 0)
            test.test(("apply", np.array([[0.5]])),
                      expected_outputs=np.array([[[0.3, 0.4, 0.5]]]))
            index_value, buffer_value = test.read_variable_values(index, buffer)
            self.assertEqual(index_value, 1)
Пример #28
0
    def update_if_necessary(self):
        """
        Calls update on the agent according to the update schedule set for this worker.

        #Args:
        #    timesteps_executed (int): Timesteps executed thus far.

        Returns:
            float: The summed up loss (over all self.update_steps).
        """
        if self.updating:
            # Are we allowed to update?
            if self.agent.timesteps > self.steps_before_update and \
                    (self.agent.observe_spec["buffer_enabled"] is False or  # no update before some data in buffer
                     self.agent.timesteps >= self.agent.observe_spec["buffer_size"]) and \
                    self.agent.timesteps % self.update_interval == 0:  # update frequency check
                loss = 0
                for _ in range_(self.update_steps):
                    ret = self.agent.update()
                    if isinstance(ret, tuple):
                        loss += ret[0]
                    else:
                        loss += ret
                return loss

        return None
Пример #29
0
    def create_variables(self, input_spaces, action_space=None):
        super(MemPrioritizedReplay, self).create_variables(input_spaces, action_space)
        self.priority_capacity = 1
        while self.priority_capacity < self.capacity:
            self.priority_capacity *= 2

        # Create segment trees, initialize with neutral elements.
        sum_values = [0.0 for _ in range_(2 * self.priority_capacity)]
        sum_segment_tree = MemSegmentTree(sum_values, self.priority_capacity, operator.add)
        min_values = [float('inf') for _ in range_(2 * self.priority_capacity)]
        min_segment_tree = MemSegmentTree(min_values, self.priority_capacity, min)

        self.merged_segment_tree = MinSumSegmentTree(
            sum_tree=sum_segment_tree,
            min_tree=min_segment_tree,
            capacity=self.priority_capacity
        )
Пример #30
0
 def step(self, actions):
     states, rewards, terminals, infos = [], [], [], []
     for i in range_(self.num_envs):
         state, reward, terminal, info = self.environments[i].step(actions[i])
         states.append(state)
         rewards.append(reward)
         terminals.append(terminal)
         infos.append(info)
     return states, rewards, terminals, infos