Пример #1
0
def generate_eval_envs(eval_env: OnlineFlexibleResourceAllocationEnv, num_evals: int, folder: str,
                       overwrite: bool = False) -> List[str]:
    """
    Generates and saves the evaluation environment used for evaluating training of the agents

    Args:
        eval_env: The evaluation environment used to generate the files
        num_evals: The number of environments generated
        folder: The folder where the environments are generated
        overwrite: If to overwrite previous environments saved

    Returns: A list of environment file paths
    """
    if not os.path.exists(folder):
        os.makedirs(folder)

    eval_files = []
    for eval_num in range(num_evals):
        eval_file = f'{folder}/eval_{eval_num}.env'
        eval_files.append(eval_file)
        if overwrite or not os.path.exists(eval_file):
            eval_env.reset()
            eval_env.save_env(eval_file)

    return eval_files
Пример #2
0
def test_env_load_settings():
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    env_state = env.reset()

    for server, tasks in env_state.server_tasks.items():
        server.assert_valid()
        for task in tasks:
            task.assert_valid()

    for task in env._unallocated_tasks:
        task.assert_valid()
Пример #3
0
def multi_env_single_env_training(folder, datetime, primary_writer, task_pricing_agents, resource_weighting_agents,
                                  multi_env_training: bool = True, total_episodes: int = 600, eval_freq: int = 10):
    """
    Multi and single environment training

    Args:
        folder: Training folder name
        datetime: The datetime of the training
        primary_writer: The primary writer for the multiple environment
        task_pricing_agents: List of task pricing agents
        resource_weighting_agents: List of resource weighting agents
        multi_env_training: If to use multi env training
        total_episodes: Number of training episodes
        eval_freq: The evaluation frequency
    """
    single_env = OnlineFlexibleResourceAllocationEnv('./training/settings/basic.env')
    multi_env = OnlineFlexibleResourceAllocationEnv([
        './training/settings/basic.env',
        './training/settings/large_tasks_servers.env',
        './training/settings/limited_resources.env',
        './training/settings/mixture_tasks_servers.env'
    ])

    multi_envs_eval = generate_eval_envs(multi_env, 20, f'./training/settings/eval_envs/multi_env/')
    single_env_eval = generate_eval_envs(single_env, 5, f'./training/settings/eval_envs/single_env/')
    single_env_eval_writer = tf.summary.create_file_writer(f'training/results/logs/{folder}_single_env_{datetime}')

    # Loop over the episodes
    for episode in range(total_episodes):
        if episode % 5 == 0:
            print(f'Episode: {episode} at {dt.datetime.now().strftime("%H:%M:%S")}')
        with primary_writer.as_default():
            if multi_env_training:
                train_agent(multi_env, task_pricing_agents, resource_weighting_agents)
            else:
                train_agent(single_env, task_pricing_agents, resource_weighting_agents)

        # Every eval_frequency episodes, the agents are evaluated
        if episode % eval_freq == 0:
            with primary_writer.as_default():
                eval_agent(multi_envs_eval, episode, task_pricing_agents, resource_weighting_agents)
            with single_env_eval_writer.as_default():
                eval_agent(single_env_eval, episode, task_pricing_agents, resource_weighting_agents)

    for agent in task_pricing_agents:
        agent.save()
    for agent in resource_weighting_agents:
        agent.save()
def test_train_agents():
    print()
    setup_tensorboard('training/tmp/', 'train_agents')

    env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env')

    pricing_agents = [
        TaskPricingDqnAgent(0,
                            create_rnn_dqn_network(9, 5),
                            batch_size=16,
                            initial_training_replay_size=16,
                            training_freq=100),
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(2,
                                  create_rnn_dqn_network(16, 5),
                                  batch_size=16,
                                  initial_training_replay_size=16,
                                  training_freq=100)
    ]

    for _ in range(2):
        train_agent(env, pricing_agents, weighting_agents)

    # noinspection PyTypeChecker
    agents: List[
        ReinforcementLearningAgent] = pricing_agents + weighting_agents
    for agent in agents:
        assert 0 < agent.total_updates
def test_seq2seq_training():
    print()
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/resource_allocation.env')

    agent = ResourceWeightingSeq2SeqAgent(0,
                                          create_seq2seq_actor_network(),
                                          create_seq2seq_critic_network(),
                                          create_seq2seq_critic_network(),
                                          batch_size=1,
                                          save_folder='tmp')
    for _ in range(4):
        actions = {
            server: agent.weight(tasks, server, state.time_step, training=True)
            for server, tasks in state.server_tasks.items()
        }

        next_state, rewards, done, _ = env.step(actions)

        for server in state.server_tasks.keys():
            resource_state = ResourceAllocationState(
                state.server_tasks[server], server, state.time_step)
            next_resource_state = ResourceAllocationState(
                next_state.server_tasks[server], server, next_state.time_step)
            agent.resource_allocation_obs(resource_state, actions[server],
                                          next_resource_state, rewards[server])

        state = next_state

    agent.batch_size = len(agent.replay_buffer)
    print(f'Batch size: {agent.batch_size}')
    agent.train()
Пример #6
0
def test_seq2seq_actions():
    print()
    # Check that Seq2seq PG actions are valid
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/resource_allocation.env')

    actor_network = create_seq2seq_actor_network()
    critic_network = create_seq2seq_critic_network()
    twin_critic_network = create_seq2seq_critic_network()
    seq2seq_agent = ResourceWeightingSeq2SeqAgent(0, actor_network,
                                                  critic_network,
                                                  twin_critic_network)

    weighting_actions = {
        server: seq2seq_agent.weight(tasks, server, state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    state, rewards, done, _ = env.step(weighting_actions)

    weighting_actions = {
        server: seq2seq_agent.weight(tasks,
                                     server,
                                     state.time_step,
                                     training=True)
        for server, tasks in state.server_tasks.items()
    }
    state, rewards, done, _ = env.step(weighting_actions)
Пример #7
0
def test_agent_actions():
    print()
    pricing_agents = [
        TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5)),
        TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 5)),
        TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 5)),
        TaskPricingCategoricalDqnAgent(
            3, create_lstm_categorical_dqn_network(9, 5)),
        TaskPricingDdpgAgent(4, create_lstm_actor_network(9),
                             create_lstm_critic_network(9)),
        TaskPricingTD3Agent(5, create_lstm_actor_network(9),
                            create_lstm_critic_network(9),
                            create_lstm_critic_network(9))
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5)),
        ResourceWeightingDdqnAgent(1, create_lstm_dqn_network(16, 5)),
        ResourceWeightingDuelingDqnAgent(
            2, create_lstm_dueling_dqn_network(16, 5)),
        ResourceWeightingCategoricalDqnAgent(
            3, create_lstm_categorical_dqn_network(16, 5)),
        ResourceWeightingDdpgAgent(4, create_lstm_actor_network(16),
                                   create_lstm_critic_network(16)),
        ResourceWeightingTD3Agent(5, create_lstm_actor_network(16),
                                  create_lstm_critic_network(16),
                                  create_lstm_critic_network(16))
    ]

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')
    for agent in pricing_agents:
        actions = {
            server: agent.bid(state.auction_task, tasks, server,
                              state.time_step)
            for server, tasks in state.server_tasks.items()
        }
    # noinspection PyUnboundLocalVariable
    print(
        f'Actions: {{{", ".join([f"{server.name}: {action}" for server, action in actions.items()])}}}'
    )

    state, rewards, done, _ = env.step(actions)

    for agent in weighting_agents:
        actions = {
            server: agent.weight(tasks, server, state.time_step)
            for server, tasks in state.server_tasks.items()
        }
    print(
        f'Actions: {{{", ".join([f"{server.name}: {list(task_action.values())}" for server, task_action in actions.items()])}}}'
    )

    state, rewards, done, _ = env.step(actions)
def test_agent_evaluation():
    print()
    setup_tensorboard('training/results/tmp/', 'agent_eval')

    env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env')

    eval_envs = generate_eval_envs(env,
                                   5,
                                   'training/settings/tmp/',
                                   overwrite=True)
    assert len(os.listdir('training/settings/tmp/')) == 5
    total_auctions, total_resource_allocation = 0, 0
    for eval_env in eval_envs:
        env, state = OnlineFlexibleResourceAllocationEnv.load_env(eval_env)
        total_auctions += len(env._unallocated_tasks) + (
            1 if state.auction_task is not None else 0)
        total_resource_allocation += env._total_time_steps + 1

    pricing_agents = [
        TaskPricingDqnAgent(0, create_bidirectional_dqn_network(9, 5)),
        TaskPricingDdpgAgent(1, create_lstm_actor_network(9),
                             create_lstm_critic_network(9))
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(2, create_bidirectional_dqn_network(16, 5)),
        ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16),
                                   create_lstm_critic_network(16)),
    ]

    results = eval_agent(eval_envs, 0, pricing_agents, weighting_agents)
    print(
        f'Results - Total prices: {results.total_prices}, Number of completed tasks: {results.num_completed_tasks}, '
        f'failed tasks: {results.num_failed_tasks}, winning prices: {results.winning_prices}, '
        f'Number of auctions: {results.num_auctions}, resource allocations: {results.num_resource_allocations}'
    )
    assert 0 < results.num_completed_tasks
    assert 0 < results.num_failed_tasks

    assert results.num_auctions == total_auctions
    assert results.num_resource_allocations == total_resource_allocation
Пример #9
0
def eval_fixed_env(eval_envs_filename):
    total_completed_tasks = []
    for eval_env_filename in eval_envs_filename:
        env, state = OnlineFlexibleResourceAllocationEnv.load_env(
            eval_env_filename)

        try:
            fixed_completed_tasks = fixed_resource_allocation_model(env, state)
        except Exception as e:
            fixed_completed_tasks = -1
        total_completed_tasks.append(fixed_completed_tasks)

    return total_completed_tasks
Пример #10
0
def test_env_save_load():
    # TODO add comments
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    state = env.reset()

    random_task_pricing = RandomTaskPricingAgent(0)
    random_resource_weighting = RandomResourceWeightingAgent(0)

    for _ in range(40):
        if state.auction_task is not None:
            actions = {
                server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        else:
            actions = {
                server: random_resource_weighting.weight(tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        state, rewards, done, info = env.step(actions)

    env.save_env('env/settings/tmp/save.env')
    loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env')

    assert state.auction_task == loaded_env_state.auction_task
    assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks)
    for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks):
        assert task == loaded_task
    for server, tasks in state.server_tasks.items():
        loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks)
                                            for loaded_server, loaded_tasks in state.server_tasks.items()
                                            if loaded_server.name == server.name), (None, None))
        assert loaded_server is not None and loaded_tasks is not None
        assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \
            server.computational_cap == loaded_server.computational_cap and \
            server.bandwidth_cap == loaded_server.bandwidth_cap
        for task, loaded_task in zip(tasks, loaded_tasks):
            assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \
                task.required_computation == loaded_task.required_computation and \
                task.required_results_data == loaded_task.required_results_data and \
                task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \
                task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \
                task.compute_progress == loaded_task.compute_progress and \
                task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price
            task.assert_valid()

    loaded_env.save_env('env/settings/tmp/loaded_save.env')
    with open('env/settings/tmp/save.env') as env_file:
        env_file_data = env_file.read()
    with open('env/settings/tmp/loaded_save.env') as loaded_env_file:
        loaded_env_file_data = loaded_env_file.read()
    assert env_file_data == loaded_env_file_data
def test_train_rnd_agents():
    print()
    setup_tensorboard('training/tmp/', 'train_rnd_agents')
    env = OnlineFlexibleResourceAllocationEnv([
        '../src/training/settings/basic.env',
        '../src/training/settings/large_tasks_servers.env',
        '../src/training/settings/mixture_tasks_servers.env',
        '../src/training/settings/limited_resources.env',
    ])

    pricing_agents = [RandomTaskPricingRLAgent(0)]
    weighting_agents = [RandomResourceWeightingRLAgent(0)]

    for _ in range(10):
        train_agent(env, pricing_agents, weighting_agents)
def test_env_auction_step():
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'env/settings/auction.env')

    server_0, server_1, server_2 = list(state.server_tasks.keys())
    assert server_0.name == 'Basic 0' and server_1.name == 'Basic 1' and server_2.name == 'Basic 2'

    # Tests a normal circumstance for the Vickrey auction with second price winning
    actions = {server_0: 1.0, server_1: 3.0, server_2: 0.0}

    next_state, rewards, done, info = env.step(actions)
    assert server_0 in rewards and rewards[server_0] == 3.0
    assert len(state.server_tasks[server_0]) + 1 == len(next_state.server_tasks[server_0]) and \
        len(state.server_tasks[server_1]) == len(next_state.server_tasks[server_1]) and \
        len(state.server_tasks[server_2]) == len(next_state.server_tasks[server_2])
    state = next_state

    # Test a case where server provide the same price
    actions = {server_0: 3.0, server_1: 3.0, server_2: 0.0}
    next_state, rewards, done, _ = env.step(actions)
    assert (server_0 in rewards
            and rewards[server_0] == 3.0) or (server_1 in rewards
                                              and rewards[server_1] == 3.0)
    assert len(next_state.server_tasks[server_0]) == len(state.server_tasks[server_0]) + 1 or \
        len(next_state.server_tasks[server_1]) == len(state.server_tasks[server_1]) + 1

    # Test where no server provides a price
    actions = {server_0: 0.0, server_1: 0.0, server_2: 0.0}
    state, rewards, done, _ = env.step(actions)
    assert len(rewards) == 0

    # Test where only a single server provides a price
    actions = {server_0: 1.0, server_1: 0.0, server_2: 0.0}
    next_state, rewards, done, _ = env.step(actions)
    assert server_0 in rewards and rewards[server_0] == 1.0
    assert len(next_state.server_tasks[server_0]) == len(
        state.server_tasks[server_0]) + 1

    # Test all of the server bid
    actions = {server_0: 2.0, server_1: 3.0, server_2: 1.0}
    state, rewards, done, _ = env.step(actions)
    assert server_2 in rewards and rewards[server_2] == 2.0
Пример #13
0
def eval_agent(env_filenames: List[str], episode: int, pricing_agents: List[TaskPricingAgent],
               weighting_agents: List[ResourceWeightingAgent]) -> EvalResults:
    """
    Evaluation of agents using a list of preset environments

    Args:
        env_filenames: Evaluation environment filenames
        episode: The episode of evaluation
        pricing_agents: List of task pricing agents
        weighting_agents: List of resource weighting agents

    Returns: The evaluation results
    """
    results = EvalResults()

    for env_filename in env_filenames:
        eval_env, state = OnlineFlexibleResourceAllocationEnv.load_env(env_filename)
        server_pricing_agents, server_weighting_agents = allocate_agents(state, pricing_agents, weighting_agents)

        done = False
        while not done:
            if state.auction_task:
                bidding_actions = {
                    server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                state, rewards, done, info = eval_env.step(bidding_actions)
                results.auction(bidding_actions, rewards)
            else:
                weighting_actions = {
                    server: server_weighting_agents[server].weight(tasks, server, state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                state, rewards, done, info = eval_env.step(weighting_actions)
                results.resource_allocation(weighting_actions, rewards)

        results.finished_env()

    results.save(episode)
    return results
Пример #14
0
def test_c51_actions():
    print()
    # Test the C51 agent actions
    pricing_agent = TaskPricingCategoricalDqnAgent(
        3, create_lstm_categorical_dqn_network(9, 5), initial_epsilon=0.5)
    weighting_agent = ResourceWeightingCategoricalDqnAgent(
        3, create_lstm_categorical_dqn_network(16, 5), initial_epsilon=0.5)

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')
    auction_actions = {
        server: pricing_agent.bid(state.auction_task, tasks, server,
                                  state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    print(f'Greedy actions: {list(auction_actions.values())}')
    assert any(0 < action for server, action in auction_actions.items())

    server, tasks = next(
        (server, tasks) for server, tasks in state.server_tasks.items())
    observation = tf.expand_dims(pricing_agent._network_obs(
        state.auction_task, tasks, server, state.time_step),
                                 axis=0)
    network_output = pricing_agent.model_network(observation)
    probabilities = tf.nn.softmax(network_output)
    probability_value = probabilities * pricing_agent.z_values
    q_values = tf.reduce_sum(probability_value, axis=2)
    argmax_q_values = tf.math.argmax(q_values, axis=1, output_type=tf.int32)
    print(
        f'Network output: {network_output}\nProbabilities: {probabilities}\nProbability value: {probability_value}\n'
        f'Q value: {q_values}\nArgmax Q value: {argmax_q_values}')

    auction_actions = {
        server: pricing_agent.bid(state.auction_task,
                                  tasks,
                                  server,
                                  state.time_step,
                                  training=True)
        for server, tasks in state.server_tasks.items()
    }
    print(f'Epsilon Greedy actions: {list(auction_actions.values())}\n')
    assert any(0 < action for server, action in auction_actions.items())

    states, rewards, dones, _ = env.step(auction_actions)

    weighting_actions = {
        server: weighting_agent.weight(tasks, server, state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    print(
        f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
    )
    assert any(0 < action for server, action in auction_actions.items())

    weighting_actions = {
        server: weighting_agent.weight(tasks,
                                       server,
                                       state.time_step,
                                       training=True)
        for server, tasks in state.server_tasks.items()
    }
    print(
        f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
    )
    assert any(0 < action
               for server, task_actions in weighting_actions.items()
               for task, action in task_actions.items())
Пример #15
0
def test_ddpg_actions():
    print()
    # Check that DDPG actions are valid
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')

    repeat, max_repeat = 0, 10
    auction_actions = {}
    while repeat <= max_repeat:
        pricing_agent = TaskPricingDdpgAgent(3,
                                             create_lstm_actor_network(9),
                                             create_lstm_critic_network(9),
                                             initial_epsilon=0.5)
        auction_actions = {
            server: pricing_agent.bid(state.auction_task, tasks, server,
                                      state.time_step)
            for server, tasks in state.server_tasks.items()
        }
        print(f'Greedy actions: {list(auction_actions.values())}')
        if any(0 < action for server, action in auction_actions.items()):

            auction_actions = {
                server: pricing_agent.bid(state.auction_task,
                                          tasks,
                                          server,
                                          state.time_step,
                                          training=True)
                for server, tasks in state.server_tasks.items()
            }
            print(
                f'Epsilon Greedy actions: {list(auction_actions.values())}\n')
            if any(0 < action for server, action in auction_actions.items()):
                break
        elif repeat == max_repeat:
            raise Exception()
        else:
            repeat += 1

    states, rewards, dones, _ = env.step(auction_actions)

    repeat, max_repeat = 0, 10
    while repeat <= max_repeat:
        weighting_agent = ResourceWeightingDdpgAgent(
            3,
            create_lstm_actor_network(16),
            create_lstm_critic_network(16),
            initial_epsilon=0.5)
        weighting_actions = {
            server: weighting_agent.weight(tasks, server, state.time_step)
            for server, tasks in state.server_tasks.items()
        }
        print(
            f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
        )
        if any(0 < action
               for server, task_actions in weighting_actions.items()
               for task, action in task_actions.items()):
            weighting_actions = {
                server: weighting_agent.weight(tasks,
                                               server,
                                               state.time_step,
                                               training=True)
                for server, tasks in state.server_tasks.items()
            }
            print(
                f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
            )
            if any(0 < action
                   for server, task_actions in weighting_actions.items()
                   for task, action in task_actions.items()):
                break
        elif repeat == max_repeat:
            raise Exception()
        else:
            repeat += 1
def test_task_price_training():
    print()
    setup_tensorboard('/tmp/results/', 'price_training')

    # List of agents
    agents: List[TaskPricingRLAgent] = [
        TaskPricingDqnAgent(0,
                            create_lstm_dqn_network(9, 10),
                            batch_size=4,
                            save_folder='tmp'),
        TaskPricingDdqnAgent(1,
                             create_lstm_dqn_network(9, 10),
                             batch_size=4,
                             save_folder='tmp'),
        TaskPricingDuelingDqnAgent(2,
                                   create_lstm_dueling_dqn_network(9, 10),
                                   batch_size=4,
                                   save_folder='tmp'),
        TaskPricingCategoricalDqnAgent(3,
                                       create_lstm_categorical_dqn_network(
                                           9, 10),
                                       batch_size=4,
                                       save_folder='tmp'),
        TaskPricingDdpgAgent(4,
                             create_lstm_actor_network(9),
                             create_lstm_critic_network(9),
                             batch_size=4,
                             save_folder='tmp'),
        TaskPricingTD3Agent(5,
                            create_lstm_actor_network(9),
                            create_lstm_critic_network(9),
                            create_lstm_critic_network(9),
                            batch_size=4,
                            save_folder='tmp')
    ]

    # Load the environment
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/auction.env')

    # Servers
    server_1, server_2 = list(state.server_tasks.keys())
    # Actions
    actions = {server_1: 1.0, server_2: 2.0}

    # Environment step
    next_state, reward, done, info = env.step(actions)

    # Server states
    server_1_state = TaskPricingState(state.auction_task,
                                      state.server_tasks[server_1], server_1,
                                      state.time_step)
    server_2_state = TaskPricingState(state.auction_task,
                                      state.server_tasks[server_2], server_2,
                                      state.time_step)

    # Next server states
    next_server_1_state = TaskPricingState(next_state.auction_task,
                                           next_state.server_tasks[server_1],
                                           server_1, next_state.time_step)
    next_server_2_state = TaskPricingState(next_state.auction_task,
                                           next_state.server_tasks[server_2],
                                           server_2, next_state.time_step)
    # Finished auction task
    finished_task = next(finished_task
                         for finished_task in next_state.server_tasks[server_1]
                         if finished_task == state.auction_task)
    finished_task = finished_task._replace(stage=TaskStage.COMPLETED)
    failed_task = finished_task._replace(stage=TaskStage.FAILED)

    # Loop over the agents, add the observations and try training
    for agent in agents:
        agent.winning_auction_bid(server_1_state, actions[server_1],
                                  finished_task, next_server_1_state)
        agent.winning_auction_bid(server_1_state, actions[server_1],
                                  failed_task, next_server_1_state)
        agent.failed_auction_bid(server_2_state, actions[server_2],
                                 next_server_2_state)
        agent.failed_auction_bid(server_2_state, 0, next_server_2_state)

        agent.train()

    print(
        f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}'
    )
def test_epsilon_policy():
    print()
    # Tests the epsilon policy by getting agent actions that should update the agent epsilon over time

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')

    # Number of epsilon steps for the agents
    epsilon_steps = 25

    # Agents that have a custom _get_action function
    pricing_agents = [
        TaskPricingDqnAgent(0,
                            create_lstm_dqn_network(9, 5),
                            epsilon_steps=epsilon_steps,
                            epsilon_update_freq=1,
                            epsilon_log_freq=1),
        TaskPricingCategoricalDqnAgent(1,
                                       create_lstm_categorical_dqn_network(
                                           9, 5),
                                       epsilon_steps=epsilon_steps,
                                       epsilon_update_freq=1,
                                       epsilon_log_freq=1),
        TaskPricingDdpgAgent(2,
                             create_lstm_actor_network(9),
                             create_lstm_critic_network(9),
                             epsilon_steps=epsilon_steps,
                             epsilon_update_freq=1,
                             epsilon_log_freq=1)
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(0,
                                  create_lstm_dqn_network(16, 5),
                                  epsilon_steps=epsilon_steps,
                                  epsilon_update_freq=1,
                                  epsilon_log_freq=1),
        ResourceWeightingCategoricalDqnAgent(
            1,
            create_lstm_categorical_dqn_network(16, 5),
            epsilon_steps=epsilon_steps,
            epsilon_update_freq=1,
            epsilon_log_freq=1),
        ResourceWeightingDdpgAgent(2,
                                   create_lstm_actor_network(16),
                                   create_lstm_critic_network(16),
                                   epsilon_steps=epsilon_steps,
                                   epsilon_update_freq=1,
                                   epsilon_log_freq=1)
    ]

    # Generate a tf writer and generate actions that will update the epsilon values for both agents
    writer = tf.summary.create_file_writer(f'agent/tmp/testing_epsilon')
    num_steps = 10
    with writer.as_default():
        for _ in range(num_steps):
            for agent in pricing_agents:
                actions = {
                    server: agent.bid(state.auction_task,
                                      tasks,
                                      server,
                                      state.time_step,
                                      training=True)
                    for server, tasks in state.server_tasks.items()
                }

        state, rewards, done, _ = env.step(actions)

        for _ in range(num_steps):
            for agent in weighting_agents:
                actions = {
                    server: agent.weight(tasks,
                                         server,
                                         state.time_step,
                                         training=True)
                    for server, tasks in state.server_tasks.items()
                }

        state, rewards, done, _ = env.step(actions)

    # Check that the resulting total action are valid
    for agent in pricing_agents:
        print(f'Agent: {agent.name}')
        assert agent.total_actions == num_steps * 3

    for agent in weighting_agents:
        print(f'Agent: {agent.name}')
        assert agent.total_actions == num_steps * 3

    # Check that the agent epsilon are correct
    assert pricing_agents[0].final_epsilon == pricing_agents[
        0].epsilon and pricing_agents[1].final_epsilon == pricing_agents[
            1].epsilon
    assert weighting_agents[0].final_epsilon == weighting_agents[
        0].epsilon and weighting_agents[1].final_epsilon == weighting_agents[
            1].epsilon
    assert pricing_agents[2].final_epsilon_std == pricing_agents[2].epsilon_std
    assert weighting_agents[2].final_epsilon_std == weighting_agents[
        2].epsilon_std
def test_resource_allocation_training():
    print()
    setup_tensorboard('/tmp/results/', 'resource_allocation_training')

    # List of agents
    agents: List[ResourceWeightingRLAgent] = [
        ResourceWeightingDqnAgent(0,
                                  create_lstm_dqn_network(16, 10),
                                  batch_size=4,
                                  save_folder='tmp'),
        ResourceWeightingDdqnAgent(1,
                                   create_lstm_dqn_network(16, 10),
                                   batch_size=4,
                                   save_folder='tmp'),
        ResourceWeightingDuelingDqnAgent(2,
                                         create_lstm_dueling_dqn_network(
                                             16, 10),
                                         batch_size=4,
                                         save_folder='tmp'),
        ResourceWeightingCategoricalDqnAgent(
            3,
            create_lstm_categorical_dqn_network(16, 10),
            batch_size=2,
            save_folder='tmp'),
        ResourceWeightingDdpgAgent(4,
                                   create_lstm_actor_network(16),
                                   create_lstm_critic_network(16),
                                   batch_size=4,
                                   save_folder='tmp'),
        ResourceWeightingTD3Agent(5,
                                  create_lstm_actor_network(16),
                                  create_lstm_critic_network(16),
                                  create_lstm_critic_network(16),
                                  batch_size=4,
                                  save_folder='tmp'),
    ]

    # Load the environment
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/resource_allocation.env')

    # Servers and tasks
    server = list(state.server_tasks.keys())[0]
    task_1, task_2, task_3, task_4 = list(state.server_tasks[server])

    # Actions
    actions = {server: {task_1: 1.0, task_2: 3.0, task_3: 0.0, task_4: 5.0}}

    # Environment step
    next_state, rewards, done, _ = env.step(actions)

    # Resource state
    resource_state = ResourceAllocationState(state.server_tasks[server],
                                             server, state.time_step)
    # Next server and resource state
    next_resource_state = ResourceAllocationState(
        next_state.server_tasks[server], server, next_state.time_step)

    for agent in agents:
        agent.resource_allocation_obs(resource_state, actions[server],
                                      next_resource_state, rewards[server])

        agent.train()

    agent = ResourceWeightingSeq2SeqAgent(6,
                                          create_seq2seq_actor_network(),
                                          create_seq2seq_critic_network(),
                                          create_seq2seq_critic_network(),
                                          batch_size=2,
                                          save_folder='tmp')
    agent.resource_allocation_obs(resource_state, actions[server],
                                  next_resource_state, rewards[server])
    agent.resource_allocation_obs(resource_state, actions[server],
                                  next_resource_state, rewards[server])
    agent.train()

    print(
        f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}'
    )
def test_env_step_rnd_action():
    """
    Tests the environment works with random actions
    """
    print()

    # Generate the environment
    env = OnlineFlexibleResourceAllocationEnv([
        '../src/training/settings/basic.env',
        '../src/training/settings/large_tasks_servers.env',
        '../src/training/settings/mixture_tasks_servers.env',
        '../src/training/settings/limited_resources.env',
    ])

    # Random action agents
    random_task_pricing, random_resource_weighting = RandomTaskPricingAgent(
        0), RandomResourceWeightingAgent(0)

    # Run the environment multiple times
    for _ in tqdm(range(200)):
        state = env.reset()

        # Number of auction opportunities
        num_auction_opportunities = len(
            env._unallocated_tasks) + (1 if state.auction_task else 0)
        # Number of auction and resource allocation steps taken
        num_auctions, num_resource_allocations = 0, 0
        # Number of environment server
        num_servers = len(state.server_tasks)

        # Take random steps over the environment
        done = False
        while not done:
            # Check that the number of servers is constant
            assert len(state.server_tasks) == num_servers

            # Generate the actions
            if state.auction_task:
                actions: Dict[Server, float] = {
                    server: random_task_pricing.bid(state.auction_task,
                                                    allocated_tasks, server,
                                                    state.time_step)
                    for server, allocated_tasks in state.server_tasks.items()
                }
                num_auctions += 1
            else:
                actions: Dict[Server, Dict[Task, float]] = {
                    server:
                    random_resource_weighting.weight(tasks, server,
                                                     state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                num_resource_allocations += 1

            # Take the action on the environment
            state, reward, done, info = env.step(actions)
            assert all(task.auction_time <= state.time_step <= task.deadline
                       for _, tasks in state.server_tasks.items()
                       for task in tasks)
            for server, tasks in state.server_tasks.items():
                server.assert_valid()
                for task in tasks:
                    task.assert_valid()

        # Check that the number of auction and resource allocation steps are correct
        assert state.auction_task is None
        assert len(env._unallocated_tasks) == 0
        assert num_auctions == num_auction_opportunities
        assert num_resource_allocations == env._total_time_steps + 1
def test_env_resource_allocation_step():
    print()

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'env/settings/resource_allocation.env')
    print(state)
Пример #21
0
def train_agent(training_env: OnlineFlexibleResourceAllocationEnv, pricing_agents: List[TaskPricingRLAgent],
                weighting_agents: List[ResourceWeightingRLAgent]):
    """
    Trains reinforcement learning agents through the provided environment

    Args:
        training_env: Training environment used
        pricing_agents: A list of reinforcement learning task pricing agents
        weighting_agents: A list of reinforcement learning resource weighting agents
    """
    # Reset the environment getting a new training environment for this episode
    state = training_env.reset()

    # Allocate the servers with their random task pricing and resource weighting agents
    server_pricing_agents: Dict[Server, TaskPricingRLAgent] = {
        server: rnd.choice(pricing_agents) for server in state.server_tasks.keys()
    }
    server_weighting_agents: Dict[Server, ResourceWeightingRLAgent] = {
        server: rnd.choice(weighting_agents) for server in state.server_tasks.keys()
    }

    # Store each server's auction observations with it being (None for first auction because no observation was seen previously)
    #   the agent state for the auction (auction task, server tasks, server, time), the action taken and if the auction task was won
    server_auction_states: Dict[Server, Optional[Tuple[TaskPricingState, float, bool]]] = {
        server: None for server in state.server_tasks.keys()
    }

    # For successful auctions, then the agent state of the winning bid, the action taken and the following observation are
    #   all stored in order to be added as an agent observation after the task finishes in order to know if the task was completed or not
    successful_auction_states: List[Tuple[TaskPricingState, float, TaskPricingState]] = []

    # The environment is looped over till the environment is done (the current time step > environment total time steps)
    done = False
    while not done:
        # If the state has a task to be auctioned then find the pricing of each servers as the action
        if state.auction_task:
            # Get the bids for each server
            auction_prices = {
                server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step,
                                                          training=True)
                for server, tasks in state.server_tasks.items()
            }

            # Environment step using the pricing actions to get the next state, rewards, done and info
            next_state, rewards, done, info = training_env.step(auction_prices)

            # Update the server_auction_observations and auction_trajectories variables with the new next_state info
            for server, tasks in state.server_tasks.items():
                # Generate the current agent's state
                current_state = TaskPricingState(state.auction_task, tasks, server, state.time_step)

                if server_auction_states[server]:  # If a server auction observation exists
                    # Get the last time steps agent state, action and if the server won the auction
                    previous_state, previous_action, is_previous_auction_win = server_auction_states[server]

                    # If the server won the auction in the last time step then add the info to the auction trajectories
                    if is_previous_auction_win:
                        successful_auction_states.append((previous_state, previous_action, current_state))
                    else:
                        # Else add the agent state to the agent's replay buffer as a failed auction bid
                        # Else add the observation as a failure to the task pricing rl_agents
                        server_pricing_agents[server].failed_auction_bid(previous_state, previous_action, current_state)

                # Update the server auction agent states with the current agent state
                server_auction_states[server] = (current_state, auction_prices[server], server in rewards)
        else:  # Else the environment is at resource allocation stage
            # For each server and each server task calculate its relative weighting
            weighting_actions: Dict[Server, Dict[Task, float]] = {
                server: server_weighting_agents[server].weight(tasks, server, state.time_step, training=True)
                for server, tasks in state.server_tasks.items()
            }

            # Environment step using the resource weighting actions to get the next state, rewards, done and info
            next_state, finished_server_tasks, done, info = training_env.step(weighting_actions)

            # For each server, there are may be finished tasks due to the resource allocation
            #    therefore add the task pricing auction agent states with the finished tasks
            for server, finished_tasks in finished_server_tasks.items():
                for finished_task in finished_tasks:
                    # Get the successful auction agent state from the list of successful auction agent states
                    successful_auction = next((auction_agent_state
                                               for auction_agent_state in successful_auction_states
                                               if auction_agent_state[0].auction_task == finished_task), None)
                    if successful_auction is None:
                        print(f'Number of successful auction agent states: {len(successful_auction_states)}')
                        print(
                            f'Number of server tasks: {sum(len(tasks) for tasks in next_state.server_tasks.values())}')
                        print(f'Finished task: {str(finished_task)}\n\n')
                        print(f'State: {str(state)}\n')
                        print(f'Next state: {str(next_state)}')
                        break

                    # Remove the successful auction agent state
                    successful_auction_states.remove(successful_auction)

                    # Unwrap the successful auction agent state tuple
                    auction_state, action, next_auction_state = successful_auction

                    # Add the winning auction bid info to the agent
                    server_pricing_agents[server].winning_auction_bid(auction_state, action, finished_task,
                                                                      next_auction_state)

            # Add the agent states for resource allocation
            for server, tasks in state.server_tasks.items():
                agent_state = ResourceAllocationState(tasks, server, state.time_step)
                next_agent_state = ResourceAllocationState(next_state.server_tasks[server], server,
                                                           next_state.time_step)

                server_weighting_agents[server].resource_allocation_obs(agent_state, weighting_actions[server],
                                                                        next_agent_state, finished_server_tasks[server])
        assert all(task.auction_time <= next_state.time_step <= task.deadline
                   for _, tasks in next_state.server_tasks.items() for task in tasks)
        # Update the state with the next state
        state = next_state
Пример #22
0
from agents.rl_agents.agents.dqn import ResourceWeightingDqnAgent, TaskPricingDqnAgent
from agents.rl_agents.neural_networks.dqn_networks import create_bidirectional_dqn_network
from env.environment import OnlineFlexibleResourceAllocationEnv
from training.train_agents import generate_eval_envs, run_training, setup_tensorboard

if __name__ == "__main__":
    # Setup tensorboard
    folder = 'bidirectional_agents'
    writer, datetime = setup_tensorboard('training/results/logs/', folder)

    save_folder = f'{folder}_{datetime}'

    env = OnlineFlexibleResourceAllocationEnv([
        './training/settings/basic.env',
        './training/settings/large_tasks_servers.env',
        './training/settings/limited_resources.env',
        './training/settings/mixture_tasks_servers.env'
    ])
    eval_envs = generate_eval_envs(
        env, 20, f'./training/settings/eval_envs/network_arch/')

    task_pricing_agents = [
        TaskPricingDqnAgent(agent_num,
                            create_bidirectional_dqn_network(9, 21),
                            save_folder=save_folder) for agent_num in range(3)
    ]
    resource_weighting_agents = [
        ResourceWeightingDqnAgent(0,
                                  create_bidirectional_dqn_network(16, 11),
                                  save_folder=save_folder)
    ]
Пример #23
0
    )

    resource_weighting_agents = [
        ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 11))
    ]
    resource_weighting_agents[0].model_network.load_weights(
        './analysis/fixed_heuristics/eval_agents/Resource_weighting_Dqn_agent_0/update_440898'
    )

    return task_pricing_agents, resource_weighting_agents


if __name__ == "__main__":
    eval_env = OnlineFlexibleResourceAllocationEnv([
        './analysis/fixed_heuristics/settings/basic.env',
        './analysis/fixed_heuristics/settings/large_tasks_servers.env',
        './analysis/fixed_heuristics/settings/limited_resources.env',
        './analysis/fixed_heuristics/settings/mixture_tasks_servers.env'
    ])

    eval_pos = 0
    while True:
        if not os.path.exists(
                f'./analysis/fixed_heuristics/eval_envs_{eval_pos}/'):
            eval_envs = generate_eval_envs(
                eval_env, 20,
                f'./analysis/fixed_heuristics/eval_envs_{eval_pos}/')
            break
        else:
            eval_pos += 1

    task_pricing_agents, resource_weighting_agents = load_agents()