Пример #1
0
def test_independent_resource_actions():
    """
    Tests the RL environment wrapper for the ksrs_network_model example as per the examples file
    from which the initial set up code is copied.
    see snc/stochastic_network_control/environments/examples.py for the original code.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Test that the wrapper sets the spaces up as we would expect.
    assert len(env._rl_action_space.spaces) == 2
    assert np.all(np.array([s.shape[1] for s in env._rl_action_space.spaces]) == 3)
    assert len(env._action_vectors) == 3 + 3
    assert env.observation_space.shape == (4,)
Пример #2
0
def test_rl_env_from_snc_env_action_space_dims_simple():
    """
    Tests the formation and stability of the action space dimensions of the environment through
    the RL environment pipeline in a simple setting.
    """
    # Set up the environment parameters.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    _, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True)
    assert action_space_dims == action_space_dims_tf
    assert len(env.action_vectors) == sum(action_space_dims)
Пример #3
0
def get_default_env_params(state, buffer_processing_matrix=None):
    """ Default environment parameters used in the tests. """
    num_buffers = state.shape[0]
    arrival_rate = np.ones_like(state)
    num_resources = num_buffers
    constituency_matrix = np.zeros((num_resources, num_resources))
    time_interval = 1
    if buffer_processing_matrix is None:
        # default upper triangular buffer_processing_matrix
        buffer_processing_matrix = -np.triu(
            np.ones((num_buffers, num_resources))) * 0.5

    return {
        "cost_per_buffer":
        np.zeros_like(state),
        "capacity":
        np.zeros_like(state),
        "constituency_matrix":
        constituency_matrix,
        "job_generator":
        drjg.DeterministicDiscreteReviewJobGenerator(
            arrival_rate,
            buffer_processing_matrix,
            sim_time_interval=time_interval),
        "state_initialiser":
        si.DeterministicCRWStateInitialiser(state),
        "job_conservation_flag":
        True,
        "list_boundary_constraint_matrices":
        None,
    }
def test_ensure_jobs_conservation_with_multiple_demand_and_no_supply():
    num_buffers = 5
    num_activities = 5
    mu1 = 1
    mu2 = 2
    mu3 = 3
    mud1 = 4
    mud2 = 5
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0, 0],
                                         [mu1, -mu2, mu3, 0, -mud2],
                                         [0, 0, 0, 0, -mud2],
                                         [0, mu2, -mu3, -mud1, 0],
                                         [0, 0, 0, -mud1, 0]])
    ind_surplus_buffers = [1, 3]
    s0 = stinit.DeterministicCRWStateInitialiser(np.zeros((num_buffers, 1)))
    job_generator = drjg(np.ones((num_buffers, 1)),
                         buffer_processing_matrix,
                         sim_time_interval=1)
    env = ControlledRandomWalk(np.ones((num_buffers, 1)),
                               np.ones((num_buffers, 1)),
                               np.zeros((num_buffers, num_activities)),
                               job_generator,
                               s0,
                               model_type='pull',
                               ind_surplus_buffers=ind_surplus_buffers)
    state = np.zeros((num_buffers, 1))
    routing_jobs_matrix = env.ensure_jobs_conservation(
        buffer_processing_matrix, state)
    assert np.all(routing_jobs_matrix == np.zeros((num_buffers,
                                                   num_activities)))
def get_null_env_params(state,
                        num_resources=None,
                        buffer_processing_matrix=None,
                        constituency_matrix=None):
    num_buffers = state.shape[0]
    arrival_rate = np.ones_like(state)
    if num_resources is None:
        num_resources = num_buffers
    if buffer_processing_matrix is None:
        buffer_processing_matrix = -np.triu(
            np.ones((num_buffers, num_resources)))
    if constituency_matrix is None:
        constituency_matrix = np.zeros((num_resources, num_resources))
    time_interval = 1

    return {
        "cost_per_buffer":
        np.zeros_like(state),
        "capacity":
        np.zeros_like(state),
        "constituency_matrix":
        constituency_matrix,
        "job_generator":
        drjg.DeterministicDiscreteReviewJobGenerator(
            arrival_rate,
            buffer_processing_matrix,
            sim_time_interval=time_interval),
        "state_initialiser":
        si.DeterministicCRWStateInitialiser(state),
        "job_conservation_flag":
        True,
        "list_boundary_constraint_matrices":
        None,
    }
Пример #6
0
def test_render_error():
    """
    Test the render method.

    Included to give full test coverage.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    # Test that the environment fails to render. This will need to be fixed should we wish to
    # visualise episodes in future.
    pytest.raises(NotImplementedError, env.render)
Пример #7
0
def test_stepping_environment_with_action_tuple():
    """
    Tests the RL environment wrapper for the ksrs_network_model example as per the examples file
    from which the initial set up code is copied.
    see snc/stochastic_network_control/environments/examples.py for the original code.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Set up a numpy action tuple with an action for each resource and ensure that the environment
    # is able to interpret it and update itself.
    action_tuple = (np.array([0, 1, 0]), np.array([0, 0, 1]))
    env.step(action_tuple)
Пример #8
0
def test_tf_environment_wrapping():
    """
    Test wrapping the RL environment for use with TensorFlow Agents.

    Use Simple Server Queue for simplicity
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    # Try wrapping environment for tf agents.
    tf_env = TFPyEnvironment(GymWrapper(env))
    del tf_env
Пример #9
0
def test_tensorflow_action_interpretation():
    """
    Tests that tuples of TensorFlow tensors can be handled correctly by the environment.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Set up a TensorFlow action tuple with an action for each resource and ensure that the
    # environment is able to interpret it and update itself.
    action_tuple = (tf.convert_to_tensor([0, 1, 0]), tf.convert_to_tensor([0, 0, 1]))
    assert np.all(env.preprocess_action(action_tuple) == np.array([[1], [0], [1], [0]]))
Пример #10
0
def test_action_interpretation_error():
    """
    Tests that when actions are not of an expected format that the correct error is thrown.
    Uses the single server queue for simplicity.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    misformatted_action_tuple = (1, 2, 3)
    misformatted_action_value = int(1)
    pytest.raises(ValueError, env.preprocess_action, misformatted_action_tuple)
    pytest.raises(ValueError, env.preprocess_action, misformatted_action_value)
Пример #11
0
def test_simple_tensorflow_action_interpretation():
    """
    Test interpretation of action where there is only one action space. This is done in the single
    server queue setting so that there is one resource and therefore one action space.

    This tests actions as TensorFlow arrays.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    action = tf.convert_to_tensor([0, 1], tf.float32)
    assert env.preprocess_action(action) == np.array([1])
def test_controlled_random_walk_reset():
    """Check that the CRW reset do reset its state and its job generator seed"""
    cost_per_buffer = np.array([[3.5]])
    capacity = np.ones((1, 1)) * np.inf
    constituency_matrix = np.ones((1, 1))
    demand_rate = np.array([[1000]])
    buffer_processing_matrix = np.array([[5]])
    initial_state = np.array([[20]])
    seed = 42

    job_generator = prjg(sim_time_interval=1,
                         demand_rate=demand_rate,
                         buffer_processing_matrix=buffer_processing_matrix,
                         job_gen_seed=seed)
    initial_random_state = job_generator.np_random.get_state()[1]
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state=initial_state)

    env = ControlledRandomWalk(cost_per_buffer=cost_per_buffer,
                               capacity=capacity,
                               constituency_matrix=constituency_matrix,
                               job_generator=job_generator,
                               state_initialiser=s0)

    next_state, _, _, _ = env.step(action=np.ones((1, 1)))
    next_random_state = job_generator.np_random.get_state()[1]
    assert np.any(next_state != initial_state)
    assert np.any(next_random_state != initial_random_state)

    env.reset_with_random_state()
    new_initial_state = env.state
    new_initial_random_state = job_generator.np_random.get_state()[1]
    assert np.all(new_initial_state == initial_state)
    assert np.all(new_initial_random_state == initial_random_state)
Пример #13
0
def test_action_processing_in_single_server_queue():
    """
    Tests the interpretation and application of actions in the single server queue with
    deterministic dynamics. This is validated in terms of the environment state and rewards.
    """
    # Set up single server queue with no arrivals and deterministic dynamics.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (10,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.0
    job_conservation_flag = True

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = DeterministicDiscreteReviewJobGenerator(
        sim_time_interval=1,
        demand_rate=demand_rate,
        buffer_processing_matrix=buffer_processing_matrix
    )
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    new_state_idle, reward_idle, _, _ = env.step(np.array([1, 0]))
    assert new_state_idle[0] == initial_state[0]
    assert reward_idle == initial_state[0] * -1.
    new_state_active, reward_active, _, _ = env.step(np.array([0, 1]))
    assert new_state_active[0] == initial_state[0] - 1
    assert reward_active == (initial_state[0] - 1) * -1.
Пример #14
0
def test_complex_rl_action_interpretation():
    """
    Tests the RL environment's action interpretation for the ksrs_network_model example as per the
    examples file from which the initial set up code is copied.
    See snc/stochastic_network_control/environments/examples.py for the original code.
    This tests action interpretation where each resource controls multiple activities.
    """
    # Set up model parameters
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # ----------------- Enumeration of Actions -----------------
    # Zero: First group idles
    # First: First activity on (first action of first group)
    # Second: Fourth activity on (second action of first group)
    # Third: Second group idles
    # Fourth: Second activity (first action of second group)
    # Fifth: Third activity (second action of second group)

    # Build various test RL actions in increasing complexity
    idle_action = env._interpret_rl_action(np.zeros(6))
    binary_action_a = np.zeros(6)
    binary_action_a[2] = 1
    action_a = env._interpret_rl_action(binary_action_a)
    binary_action_b = np.zeros(6)
    binary_action_b[[2, 4]] = 1
    action_b = env._interpret_rl_action(binary_action_b)

    # Check that the null action, a simple action and an action combining activities across resource
    # sets is set up correctly.
    assert np.all(idle_action == np.zeros(4))
    assert np.all(action_a == np.array([0, 0, 0, 1]))
    assert np.all(action_b == np.array([0, 1, 0, 1]))
def env_job_conservation(job_conservation_flag):
    demand_rate = np.zeros((2, 1))
    cost_per_buffer = np.array([[2], [4]])
    initial_state = np.array([[0], [0]])
    capacity = np.array([[10], [10]])
    buffer_processing_matrix = np.array([[-1, 0], [1, -1]])
    job_generator = drjg(demand_rate,
                         buffer_processing_matrix,
                         sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)
    return initial_state, ControlledRandomWalk(cost_per_buffer, capacity,
                                               np.eye(2), job_generator, s0,
                                               job_conservation_flag)
def test_routing_two_serially_connected_resources_one_buffer_each():
    """Two resources with 1 buffer each. There are 2 possible actions. At every iteration, each
    resource chooses whether to work on its buffer or idle. If it works, jobs are processed at some
    rate. Jobs processed from resource 1 go to the buffer at resource 2. Events are generated
    deterministically at the rate given by the mean rate."""
    cost_per_buffer = np.array([[2], [4]])
    d1 = 1  # Rate of job arrival at buffer 1
    d2 = 0  # Rate of job arrival at buffer 2
    demand_rate = np.array([[d1], [d2]])
    initial_state = np.array([[0], [0]])
    capacity = np.array([[10], [10]])
    mu1 = 1  # Rate of job processing at buffer 1
    mu2 = 1  # Rate of job processing at buffer 2
    # Jobs processed at buffer 1 are routed to buffer 2.
    # Rows: buffer, columns: influence of activity.
    # Actions mean scheduling processing in one buffer.
    buffer_processing_matrix = np.array([[-mu1, 0], [mu1, -mu2]])
    # activity 1 (column 0), processed job at buffer 1 (row 0) will be routed to buffer 2 (row 1).
    # Each row corresponds with a time-step. The resource can only work in one buffer at a time.
    actions = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [1, 1]])
    # Expected number of jobs
    jobs = np.array([
        [1, 0],  # None buffer work. A new job gets to buffer 1 (b1)
        [
            1, 1
        ],  # Process b1 so job goes to buffer 2 (b2, then new job arrives at b1.
        [1,
         1],  # Process b1 and b2, since a new job arrives at b1, and a job is
        # routed to b2, both buffers remain the same
        [
            2, 0
        ],  # Process b2 but not b1, so b1 accumulates one job, and b2 gets empty
        [2, 0]
    ])  # Process b1 and b2, so b1 has same number of jobs at the end, while
    # b2 aims to process the empty buffer and then gets one job routed from b1.
    # Expected cost
    cost = [2, 6, 6, 4, 4]

    job_generator = drjg(demand_rate,
                         buffer_processing_matrix,
                         sim_time_interval=1)
    assert job_generator.routes == {(1, 0): (0, 0)}
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.eye(2),
                               job_generator, s0)

    for i in range(5):
        s, r, d, t = env.step(actions[i].reshape((2, 1)))
        assert np.all(s == jobs[i].reshape([2, 1]))
        np.testing.assert_approx_equal(r, -cost[i])
Пример #17
0
def test_rl_env_with_complex_activity_conditions():
    """
    A more complicated version of the previous test (test_rl_env_with_simple_activity_conditions).
    Tests the handling of more complex activity constraints in an 8 resource and 8 activity tandem
    model.
    """
    # Set up environment parameters.
    alpha1 = 4
    cost_per_buffer = np.ones((8, 1))
    initial_state = np.zeros((8, 1))
    capacity = np.ones((8, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0, 0, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0, 0, 0, 0, 0],
        [1, -1, 0, 0, 0, 0, 0, 0],
        [0, 1, -1, 0, 0, 0, 0, 0],
        [0, 0, 1, -1, 0, 0, 0, 0],
        [0, 0, 0, 1, -1, 0, 0, 0],
        [0, 0, 0, 0, 1, -1, 0, 0],
        [0, 0, 0, 0, 0, 1, -1, 0],
        [0, 0, 0, 0, 0, 0, 1, -1]
    ])

    constraints_matrix = np.array([
        [1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0, 0, 0, 0],
        [0, 1, 1, 0, 0, 0, 0, 0]
    ])
    constituency_matrix = np.vstack([np.eye(8), constraints_matrix])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3, 4, 5, 6, 7))

    # Test that action spaces are as expected.
    assert len(env._rl_action_space.spaces) == 5
    # First group has 4 resources, and 8 actions.
    assert env._rl_action_space.spaces[0].shape == (1, 8)
    # Each independent group has only two actions.
    assert env._rl_action_space.spaces[1].shape == (1, 2)
def test_ensure_jobs_conservation_with_not_enough_jobs():
    num_buffers = 2
    num_activities = 2
    buffer_processing_matrix = np.array([[-2, 0], [2, -3]])
    s0 = stinit.DeterministicCRWStateInitialiser(np.zeros((num_buffers, 1)))
    job_generator = drjg(np.ones((num_buffers, 1)),
                         buffer_processing_matrix,
                         sim_time_interval=1)
    env = ControlledRandomWalk(np.ones((num_buffers, 1)),
                               np.ones((num_buffers, 1)),
                               np.zeros((num_buffers, num_activities)),
                               job_generator, s0)

    state = np.ones((num_buffers, 1))
    routing_jobs_matrix = env.ensure_jobs_conservation(
        buffer_processing_matrix, state)
    assert np.all(routing_jobs_matrix == np.array([[-1, 0], [1, -1]]))
def test_exceed_capacity_single_buffer():
    """One resource with one buffer. Check number of jobs is always equal or less than maximum
    capacity, for the corner case when initial_state > capacity"""
    cost_per_buffer = np.array([[3.5]])
    demand_rate = np.array([[3]])
    initial_state = np.array([[10]])
    capacity = np.array([[5]])

    job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)),
                               job_generator, s0)
    action = np.ones((1, 1))

    with pytest.raises(AssertionError):
        _ = env.step(action)
def test_scheduling_multiple_buffers_events():
    """One resource (station) with 3 buffers. Thus, there are 4 possible actions namely schedule
    each buffer or idle. At every iteration, if the resource chooses to schedule to work one buffer,
    then the jobs of that buffer are removed deterministically at some processing rate. Then, new
    jobs arrive also at some deterministic rate. The number of jobs in a buffer is always
    nonnegative and less than or equal to capacity. """
    cost_per_buffer = np.array([[1.1], [2.2], [3.3]])
    d1 = 1  # Rate of job arrival at buffer 1
    d2 = 2  # Rate of job arrival at buffer 2
    d3 = 3  # Rate of job arrival at buffer 3
    demand_rate = np.array([[d1], [d2], [d3]])
    initial_state = np.array([[0], [0], [0]])
    capacity = np.array([[40], [40], [40]])
    mu1 = 1  # Rate of job processing at buffer 1
    mu2 = 2  # Rate of job processing at buffer 2
    mu3 = 3  # Rate of job processing at buffer 3
    # Rows: buffer, columns: influence of activity.
    # Actions mean scheduling processing in one buffer.
    # There is no routing in this case, so this is a diagonal matrix.
    buffer_processing_matrix = np.array([[-mu1, 0, 0], [0, -mu2, 0],
                                         [0, 0, -mu3]])
    # Each row corresponds with a time-step. The resource can only work in one buffer at a time.
    actions = np.array([[0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
                        [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0],
                        [0, 0, 1]])
    # When the resource works on a buffer, it compensate the arrivals, so the number of jobs remain
    # constant. Otherwise, the arrivals increases the number of jobs at the demand rate.
    jobs = np.array([[1, 2, 0], [1, 4, 3], [1, 6, 6], [2, 6, 9], [3, 8, 9],
                     [4, 8, 12], [4, 10, 15], [5, 12, 15], [6, 12, 18],
                     [6, 14, 21], [7, 16, 21]])
    # Expected cost Computed as dot product of cost_per_buffer and number of jobs at each iteration.
    cost = [5.5, 19.8, 34.1, 45.1, 50.6, 61.6, 75.9, 81.4, 92.4, 106.7, 112.2]

    job_generator = drjg(demand_rate,
                         buffer_processing_matrix,
                         sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 3)),
                               job_generator, s0)

    for i in range(11):
        s, r, d, t = env.step(actions[i].reshape((3, 1)))
        assert np.all(s == jobs[i].reshape([3, 1]))
        np.testing.assert_approx_equal(r, -cost[i])
Пример #21
0
def test_simple_rl_action_interpretation():
    """
    Tests the mechanism for reading in actions from an RL model.
    This utilises the 4 resource 4 activity tandem model.
    """
    # Tandem model set up without any additional activity constraints
    alpha1 = 4
    cost_per_buffer = np.ones((4, 1))
    initial_state = np.zeros((4, 1))
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0],
        [1, -1, 0, 0],
        [0, 1, -1, 0],
        [0, 0, 1, -1]
    ])

    constituency_matrix = np.eye(4)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3))

    # Build various test RL actions in increasing complexity
    null_action = env._interpret_rl_action(np.zeros(8))
    binary_action_one = np.zeros(8)
    binary_action_one[1] = 1
    action_one = env._interpret_rl_action(binary_action_one)
    binary_action_two = np.zeros(8)
    binary_action_two[[1, 3, 5, 7]] = 1
    action_two = env._interpret_rl_action(binary_action_two)

    # Test that the actions are interpreted correctly
    assert np.all(null_action == np.zeros(4))
    assert np.all(action_one == np.array([1, 0, 0, 0]))
    assert np.all(action_two == np.ones(4))
def test_routing_three_buffer_tree_topology():
    """
    Three resources with 1 buffer each, one parent with two children. There are 4 possible actions:
    children resources choose to idle or work on their respective buffer, while parent choose
    whether work and route to one child or work and route to the other. Events are generated
    deterministically at the rate given by the mean rate.
    """
    cost_per_buffer = np.array([[2], [4], [4]])
    d1 = 1  # Rate of job arrival at buffer 1
    d2 = 0  # Rate of job arrival at buffer 2
    d3 = 0  # Rate of job arrival at buffer 3
    demand_rate = np.array([[d1], [d2], [d3]])
    initial_state = np.array([[0], [0], [0]])
    capacity = np.array([[10], [10], [10]])
    mu12 = 1  # Rate of processing jobs at buffer 1 and routing to buffer 2
    mu13 = 1  # Rate of processing jobs at buffer 1 and routing to buffer 3
    mu2 = 1  # Rate of processing jobs at buffer 2
    mu3 = 1  # Rate of processing jobs at buffer 3
    # Jobs processed at buffer 1 are routed to either buffer 2 or 3.
    buffer_processing_matrix = np.array([[-mu12, -mu13, 0, 0],
                                         [mu12, 0, -mu2, 0],
                                         [0, mu13, 0, -mu3]])
    # Each row corresponds with a time-step. The resource can only work in one buffer at a time.
    actions = np.array([[0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0],
                        [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 0, 1, 0],
                        [1, 0, 0, 0], [0, 1, 0, 1], [1, 0, 0, 1]])
    # Expected jobs
    jobs = np.array([[0, 0, 1], [0, 0, 2], [0, 1, 2], [0, 1, 3], [0, 1, 4],
                     [1, 0, 4], [2, 0, 3], [2, 0, 3], [2, 1, 3], [2, 1, 3],
                     [2, 2, 2]])

    job_generator = drjg(demand_rate,
                         buffer_processing_matrix,
                         sim_time_interval=1)
    assert job_generator.routes == {(1, 0): (0, 0), (2, 1): (0, 1)}
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)
    constituency_matrix = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1],
                                    [0, 0, 0, 1]])
    env = ControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix,
                               job_generator, s0)

    for i in range(actions.shape[0]):
        s, r, d, t = env.step(actions[i].reshape((4, 1)))
        assert np.all(s == jobs[i].reshape([3, 1]))
Пример #23
0
def test_action_processing_with_multiple_resource_sets():
    """
    Tests the interpretation and application of actions in a more complex environment with
    deterministic dynamics. This is validated in terms of the environment state and rewards.
    """
    # Set up the environment parameters with no arrivals and deterministic dynamics.
    alpha1, alpha3 = 0, 0
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (100, 100, 100, 100)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True

    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct deterministic job generator.
    job_generator = DeterministicDiscreteReviewJobGenerator(
        sim_time_interval=1,
        demand_rate=demand_rate,
        buffer_processing_matrix=buffer_processing_matrix
    )

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Ensure that the idling actions do not change the environment state and are rewarded
    # accordingly.
    idle_action = (np.array([1, 0, 0]), np.array([1, 0, 0]))
    new_state_idle, reward_idle, _, _ = env.step(idle_action)
    assert all((np.array(initial_state) / sum(initial_state)) == new_state_idle)
    assert reward_idle == -1 * np.dot(initial_state, cost_per_buffer)
    # Ensure that the action which activates activities 2 and 4 is interpreted and rewarded
    # correctly.
    active_action = (np.array([0, 0, 1]), np.array([0, 1, 0]))
    new_state_active, reward_active, _, _ = env.step(active_action)
    assert np.allclose(new_state_active, np.array([100, 97, 100, 97]) / 394)
    assert reward_active == -1 * np.dot(env.state.flatten(), cost_per_buffer)
def test_scheduling_single_buffer_events_constant():
    """One resource (station) with 1 buffers. Thus, there are 2 possible actions namely process or
    idle. At every iteration, we fill but also process the buffer, so the num of jobs remain
    constant = initial_state."""
    cost_per_buffer = np.array([[3.5]])
    demand_rate = np.array([[1]])
    initial_state = np.array([[2]])
    capacity = np.array([[20]])
    job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)),
                               job_generator, s0)
    action = np.ones((1, 1))

    for i in range(100):
        s, r, d, t = env.step(action)
        assert s[0, 0] == initial_state
        assert r == -cost_per_buffer[0] * s
Пример #25
0
def test_input_queued_switch_spaces():
    """
    Test the RL environment wrapper for the input_queued_switch_3x3_model example.
    This is a good test for resources with activity constraints.
    See snc/stochastic_network_control/environments/examples.py for the original set up code.
    """
    # Set up the environment parameters.
    mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33 = (1,) * 9
    cost_per_buffer = np.ones((9, 1))
    initial_state = np.zeros((9, 1))
    capacity = np.ones((9, 1)) * np.inf
    demand_rate = 0.3 * np.ones((9, 1))
    job_conservation_flag = True
    seed = 72
    buffer_processing_matrix = - np.diag([mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33])
    constraints = np.array([[1, 0, 0, 1, 0, 0, 1, 0, 0],
                            [0, 1, 0, 0, 1, 0, 0, 1, 0],
                            [0, 0, 1, 0, 0, 1, 0, 0, 1]])
    constituency_matrix = np.vstack([
        np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 1, 1, 1, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 1, 1, 1]]),
        constraints]
    )
    index_phys_resources = (0, 1, 2)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=index_phys_resources)

    # Test that the environment wrapper works as expected.
    assert isinstance(env._rl_action_space, gym_spaces.Tuple)
    assert len(env._rl_action_space.spaces) == 1
    # There are overall 34 combinations of activities (or inactivity) that are feasible.
    assert len(env._action_vectors) == 34
    assert env._action_vectors.shape == (34, 9)
    assert snc.is_binary(env._action_vectors)
    assert snc.is_binary(np.matmul(env._action_vectors, constraints.T))
 def test_assert(self):
     bpm = np.array([[-1, 1], [-1, 0]])
     demand_rate = np.array([0.5, 0.7])[:, None]
     env_params = {
         "job_generator":
         jg.DiscreteReviewJobGenerator(sim_time_interval=0.1,
                                       demand_rate=demand_rate,
                                       buffer_processing_matrix=bpm),
         # the following parameters are dummy parameters not used in this test
         "cost_per_buffer":
         np.array([1, 1])[:, None],
         "capacity":
         np.array([1, 1])[:, None],
         "constituency_matrix":
         np.array([[1, 1], [1, 0]]),
         "state_initialiser":
         si.DeterministicCRWStateInitialiser(np.array([0.5, 0.7])[:, None]),
     }
     env = crw.ControlledRandomWalk(**env_params)
     pytest.raises(AssertionError, mw.MaxWeightAgent, env)
def test_below_capacity_single_buffer():
    """One resource with one buffer. Check number of jobs is always equal or less than maximum
    capacity."""
    cost_per_buffer = np.array([[3.5]])
    demand_rate = np.array([[3]])
    initial_state = np.array([[0]])
    capacity = np.array([[20]])

    job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)),
                               job_generator, s0)
    action = np.ones((1, 1))

    for i in range(10):
        s, r, d, t = env.step(action)
        assert 0 <= s <= capacity
    with pytest.raises(AssertionError):
        _ = env.step(action)
def test_job_conservation_multiple_routes_leave_same_buffer():
    demand_rate = np.zeros((3, 1))
    cost_per_buffer = np.ones((3, 1))
    initial_state = np.array([[3], [3], [1]])
    capacity = np.ones((3, 1)) * np.inf
    buffer_processing_matrix = np.array([[-2, -5, 0, 0], [2, 0, -10, 0],
                                         [0, 5, 0, -10]])
    constituency_mat = np.eye(4)
    job_generator = drjg(demand_rate,
                         buffer_processing_matrix,
                         sim_time_interval=1)
    job_conservation_flag = True
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = ControlledRandomWalk(cost_per_buffer, capacity, constituency_mat,
                               job_generator, s0, job_conservation_flag)
    s, r, d, t = env.step(np.array([1, 1, 1, 1])[:, None])
    # Depending on the order of activities we can get different result, since they are given by a
    # dictionary
    # In a event driven simulator, the order would be FIFO
    assert (s == np.array([[0], [2], [1]])).all() or (s == np.array(
        [[0], [0], [3]])).all()
def test_index_phys_resources_with_repeated_indexes():
    index_phys_resources = (0, 0)

    # Other needed parameters
    cost_per_buffer = np.zeros((2, 1))
    demand_rate = np.zeros((2, 1))
    initial_state = np.zeros((2, 1))
    capacity = np.zeros((2, 1))
    constituency_mat = np.eye(2)
    buffer_processing_mat = np.eye(2)
    job_generator = drjg(demand_rate,
                         buffer_processing_mat,
                         sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    with pytest.raises(AssertionError):
        ControlledRandomWalk(cost_per_buffer,
                             capacity,
                             constituency_mat,
                             job_generator,
                             s0,
                             index_phys_resources=index_phys_resources)
def test_scheduling_single_buffer_events_remove_two_until_empty():
    """One resource (station) with 1 buffers. Thus, there are 2 possible actions namely process or
    idle. We don't fill the buffer, just remove jobs, two at a time."""
    cost_per_buffer = np.array([[3.5]])
    demand_rate = np.array([[0]])
    initial_state = np.array([[20]])
    capacity = np.array([[20]])
    job_generator = drjg(demand_rate,
                         -2 * np.ones((1, 1)),
                         sim_time_interval=1)
    s0 = stinit.DeterministicCRWStateInitialiser(initial_state)

    env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)),
                               job_generator, s0)
    action = np.ones((1, 1))

    for i in range(10):
        s, r, d, t = env.step(action)
        assert s[0, 0] == np.max([0, initial_state[0] - (i + 1) * 2])
        assert r == -cost_per_buffer[0] * s
    with pytest.raises(AssertionError):
        _ = env.step(action)