def test_seq2seq_training():
    print()
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/resource_allocation.env')

    agent = ResourceWeightingSeq2SeqAgent(0,
                                          create_seq2seq_actor_network(),
                                          create_seq2seq_critic_network(),
                                          create_seq2seq_critic_network(),
                                          batch_size=1,
                                          save_folder='tmp')
    for _ in range(4):
        actions = {
            server: agent.weight(tasks, server, state.time_step, training=True)
            for server, tasks in state.server_tasks.items()
        }

        next_state, rewards, done, _ = env.step(actions)

        for server in state.server_tasks.keys():
            resource_state = ResourceAllocationState(
                state.server_tasks[server], server, state.time_step)
            next_resource_state = ResourceAllocationState(
                next_state.server_tasks[server], server, next_state.time_step)
            agent.resource_allocation_obs(resource_state, actions[server],
                                          next_resource_state, rewards[server])

        state = next_state

    agent.batch_size = len(agent.replay_buffer)
    print(f'Batch size: {agent.batch_size}')
    agent.train()
Пример #2
0
def test_seq2seq_actions():
    print()
    # Check that Seq2seq PG actions are valid
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/resource_allocation.env')

    actor_network = create_seq2seq_actor_network()
    critic_network = create_seq2seq_critic_network()
    twin_critic_network = create_seq2seq_critic_network()
    seq2seq_agent = ResourceWeightingSeq2SeqAgent(0, actor_network,
                                                  critic_network,
                                                  twin_critic_network)

    weighting_actions = {
        server: seq2seq_agent.weight(tasks, server, state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    state, rewards, done, _ = env.step(weighting_actions)

    weighting_actions = {
        server: seq2seq_agent.weight(tasks,
                                     server,
                                     state.time_step,
                                     training=True)
        for server, tasks in state.server_tasks.items()
    }
    state, rewards, done, _ = env.step(weighting_actions)
Пример #3
0
def test_agent_actions():
    print()
    pricing_agents = [
        TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5)),
        TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 5)),
        TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 5)),
        TaskPricingCategoricalDqnAgent(
            3, create_lstm_categorical_dqn_network(9, 5)),
        TaskPricingDdpgAgent(4, create_lstm_actor_network(9),
                             create_lstm_critic_network(9)),
        TaskPricingTD3Agent(5, create_lstm_actor_network(9),
                            create_lstm_critic_network(9),
                            create_lstm_critic_network(9))
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5)),
        ResourceWeightingDdqnAgent(1, create_lstm_dqn_network(16, 5)),
        ResourceWeightingDuelingDqnAgent(
            2, create_lstm_dueling_dqn_network(16, 5)),
        ResourceWeightingCategoricalDqnAgent(
            3, create_lstm_categorical_dqn_network(16, 5)),
        ResourceWeightingDdpgAgent(4, create_lstm_actor_network(16),
                                   create_lstm_critic_network(16)),
        ResourceWeightingTD3Agent(5, create_lstm_actor_network(16),
                                  create_lstm_critic_network(16),
                                  create_lstm_critic_network(16))
    ]

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')
    for agent in pricing_agents:
        actions = {
            server: agent.bid(state.auction_task, tasks, server,
                              state.time_step)
            for server, tasks in state.server_tasks.items()
        }
    # noinspection PyUnboundLocalVariable
    print(
        f'Actions: {{{", ".join([f"{server.name}: {action}" for server, action in actions.items()])}}}'
    )

    state, rewards, done, _ = env.step(actions)

    for agent in weighting_agents:
        actions = {
            server: agent.weight(tasks, server, state.time_step)
            for server, tasks in state.server_tasks.items()
        }
    print(
        f'Actions: {{{", ".join([f"{server.name}: {list(task_action.values())}" for server, task_action in actions.items()])}}}'
    )

    state, rewards, done, _ = env.step(actions)
Пример #4
0
def test_env_save_load():
    # TODO add comments
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    state = env.reset()

    random_task_pricing = RandomTaskPricingAgent(0)
    random_resource_weighting = RandomResourceWeightingAgent(0)

    for _ in range(40):
        if state.auction_task is not None:
            actions = {
                server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        else:
            actions = {
                server: random_resource_weighting.weight(tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        state, rewards, done, info = env.step(actions)

    env.save_env('env/settings/tmp/save.env')
    loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env')

    assert state.auction_task == loaded_env_state.auction_task
    assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks)
    for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks):
        assert task == loaded_task
    for server, tasks in state.server_tasks.items():
        loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks)
                                            for loaded_server, loaded_tasks in state.server_tasks.items()
                                            if loaded_server.name == server.name), (None, None))
        assert loaded_server is not None and loaded_tasks is not None
        assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \
            server.computational_cap == loaded_server.computational_cap and \
            server.bandwidth_cap == loaded_server.bandwidth_cap
        for task, loaded_task in zip(tasks, loaded_tasks):
            assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \
                task.required_computation == loaded_task.required_computation and \
                task.required_results_data == loaded_task.required_results_data and \
                task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \
                task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \
                task.compute_progress == loaded_task.compute_progress and \
                task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price
            task.assert_valid()

    loaded_env.save_env('env/settings/tmp/loaded_save.env')
    with open('env/settings/tmp/save.env') as env_file:
        env_file_data = env_file.read()
    with open('env/settings/tmp/loaded_save.env') as loaded_env_file:
        loaded_env_file_data = loaded_env_file.read()
    assert env_file_data == loaded_env_file_data
Пример #5
0
def eval_fixed_env(eval_envs_filename):
    total_completed_tasks = []
    for eval_env_filename in eval_envs_filename:
        env, state = OnlineFlexibleResourceAllocationEnv.load_env(
            eval_env_filename)

        try:
            fixed_completed_tasks = fixed_resource_allocation_model(env, state)
        except Exception as e:
            fixed_completed_tasks = -1
        total_completed_tasks.append(fixed_completed_tasks)

    return total_completed_tasks
def test_env_auction_step():
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'env/settings/auction.env')

    server_0, server_1, server_2 = list(state.server_tasks.keys())
    assert server_0.name == 'Basic 0' and server_1.name == 'Basic 1' and server_2.name == 'Basic 2'

    # Tests a normal circumstance for the Vickrey auction with second price winning
    actions = {server_0: 1.0, server_1: 3.0, server_2: 0.0}

    next_state, rewards, done, info = env.step(actions)
    assert server_0 in rewards and rewards[server_0] == 3.0
    assert len(state.server_tasks[server_0]) + 1 == len(next_state.server_tasks[server_0]) and \
        len(state.server_tasks[server_1]) == len(next_state.server_tasks[server_1]) and \
        len(state.server_tasks[server_2]) == len(next_state.server_tasks[server_2])
    state = next_state

    # Test a case where server provide the same price
    actions = {server_0: 3.0, server_1: 3.0, server_2: 0.0}
    next_state, rewards, done, _ = env.step(actions)
    assert (server_0 in rewards
            and rewards[server_0] == 3.0) or (server_1 in rewards
                                              and rewards[server_1] == 3.0)
    assert len(next_state.server_tasks[server_0]) == len(state.server_tasks[server_0]) + 1 or \
        len(next_state.server_tasks[server_1]) == len(state.server_tasks[server_1]) + 1

    # Test where no server provides a price
    actions = {server_0: 0.0, server_1: 0.0, server_2: 0.0}
    state, rewards, done, _ = env.step(actions)
    assert len(rewards) == 0

    # Test where only a single server provides a price
    actions = {server_0: 1.0, server_1: 0.0, server_2: 0.0}
    next_state, rewards, done, _ = env.step(actions)
    assert server_0 in rewards and rewards[server_0] == 1.0
    assert len(next_state.server_tasks[server_0]) == len(
        state.server_tasks[server_0]) + 1

    # Test all of the server bid
    actions = {server_0: 2.0, server_1: 3.0, server_2: 1.0}
    state, rewards, done, _ = env.step(actions)
    assert server_2 in rewards and rewards[server_2] == 2.0
Пример #7
0
def eval_agent(env_filenames: List[str], episode: int, pricing_agents: List[TaskPricingAgent],
               weighting_agents: List[ResourceWeightingAgent]) -> EvalResults:
    """
    Evaluation of agents using a list of preset environments

    Args:
        env_filenames: Evaluation environment filenames
        episode: The episode of evaluation
        pricing_agents: List of task pricing agents
        weighting_agents: List of resource weighting agents

    Returns: The evaluation results
    """
    results = EvalResults()

    for env_filename in env_filenames:
        eval_env, state = OnlineFlexibleResourceAllocationEnv.load_env(env_filename)
        server_pricing_agents, server_weighting_agents = allocate_agents(state, pricing_agents, weighting_agents)

        done = False
        while not done:
            if state.auction_task:
                bidding_actions = {
                    server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                state, rewards, done, info = eval_env.step(bidding_actions)
                results.auction(bidding_actions, rewards)
            else:
                weighting_actions = {
                    server: server_weighting_agents[server].weight(tasks, server, state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                state, rewards, done, info = eval_env.step(weighting_actions)
                results.resource_allocation(weighting_actions, rewards)

        results.finished_env()

    results.save(episode)
    return results
def test_agent_evaluation():
    print()
    setup_tensorboard('training/results/tmp/', 'agent_eval')

    env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env')

    eval_envs = generate_eval_envs(env,
                                   5,
                                   'training/settings/tmp/',
                                   overwrite=True)
    assert len(os.listdir('training/settings/tmp/')) == 5
    total_auctions, total_resource_allocation = 0, 0
    for eval_env in eval_envs:
        env, state = OnlineFlexibleResourceAllocationEnv.load_env(eval_env)
        total_auctions += len(env._unallocated_tasks) + (
            1 if state.auction_task is not None else 0)
        total_resource_allocation += env._total_time_steps + 1

    pricing_agents = [
        TaskPricingDqnAgent(0, create_bidirectional_dqn_network(9, 5)),
        TaskPricingDdpgAgent(1, create_lstm_actor_network(9),
                             create_lstm_critic_network(9))
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(2, create_bidirectional_dqn_network(16, 5)),
        ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16),
                                   create_lstm_critic_network(16)),
    ]

    results = eval_agent(eval_envs, 0, pricing_agents, weighting_agents)
    print(
        f'Results - Total prices: {results.total_prices}, Number of completed tasks: {results.num_completed_tasks}, '
        f'failed tasks: {results.num_failed_tasks}, winning prices: {results.winning_prices}, '
        f'Number of auctions: {results.num_auctions}, resource allocations: {results.num_resource_allocations}'
    )
    assert 0 < results.num_completed_tasks
    assert 0 < results.num_failed_tasks

    assert results.num_auctions == total_auctions
    assert results.num_resource_allocations == total_resource_allocation
def test_task_price_training():
    print()
    setup_tensorboard('/tmp/results/', 'price_training')

    # List of agents
    agents: List[TaskPricingRLAgent] = [
        TaskPricingDqnAgent(0,
                            create_lstm_dqn_network(9, 10),
                            batch_size=4,
                            save_folder='tmp'),
        TaskPricingDdqnAgent(1,
                             create_lstm_dqn_network(9, 10),
                             batch_size=4,
                             save_folder='tmp'),
        TaskPricingDuelingDqnAgent(2,
                                   create_lstm_dueling_dqn_network(9, 10),
                                   batch_size=4,
                                   save_folder='tmp'),
        TaskPricingCategoricalDqnAgent(3,
                                       create_lstm_categorical_dqn_network(
                                           9, 10),
                                       batch_size=4,
                                       save_folder='tmp'),
        TaskPricingDdpgAgent(4,
                             create_lstm_actor_network(9),
                             create_lstm_critic_network(9),
                             batch_size=4,
                             save_folder='tmp'),
        TaskPricingTD3Agent(5,
                            create_lstm_actor_network(9),
                            create_lstm_critic_network(9),
                            create_lstm_critic_network(9),
                            batch_size=4,
                            save_folder='tmp')
    ]

    # Load the environment
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/auction.env')

    # Servers
    server_1, server_2 = list(state.server_tasks.keys())
    # Actions
    actions = {server_1: 1.0, server_2: 2.0}

    # Environment step
    next_state, reward, done, info = env.step(actions)

    # Server states
    server_1_state = TaskPricingState(state.auction_task,
                                      state.server_tasks[server_1], server_1,
                                      state.time_step)
    server_2_state = TaskPricingState(state.auction_task,
                                      state.server_tasks[server_2], server_2,
                                      state.time_step)

    # Next server states
    next_server_1_state = TaskPricingState(next_state.auction_task,
                                           next_state.server_tasks[server_1],
                                           server_1, next_state.time_step)
    next_server_2_state = TaskPricingState(next_state.auction_task,
                                           next_state.server_tasks[server_2],
                                           server_2, next_state.time_step)
    # Finished auction task
    finished_task = next(finished_task
                         for finished_task in next_state.server_tasks[server_1]
                         if finished_task == state.auction_task)
    finished_task = finished_task._replace(stage=TaskStage.COMPLETED)
    failed_task = finished_task._replace(stage=TaskStage.FAILED)

    # Loop over the agents, add the observations and try training
    for agent in agents:
        agent.winning_auction_bid(server_1_state, actions[server_1],
                                  finished_task, next_server_1_state)
        agent.winning_auction_bid(server_1_state, actions[server_1],
                                  failed_task, next_server_1_state)
        agent.failed_auction_bid(server_2_state, actions[server_2],
                                 next_server_2_state)
        agent.failed_auction_bid(server_2_state, 0, next_server_2_state)

        agent.train()

    print(
        f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}'
    )
def test_resource_allocation_training():
    print()
    setup_tensorboard('/tmp/results/', 'resource_allocation_training')

    # List of agents
    agents: List[ResourceWeightingRLAgent] = [
        ResourceWeightingDqnAgent(0,
                                  create_lstm_dqn_network(16, 10),
                                  batch_size=4,
                                  save_folder='tmp'),
        ResourceWeightingDdqnAgent(1,
                                   create_lstm_dqn_network(16, 10),
                                   batch_size=4,
                                   save_folder='tmp'),
        ResourceWeightingDuelingDqnAgent(2,
                                         create_lstm_dueling_dqn_network(
                                             16, 10),
                                         batch_size=4,
                                         save_folder='tmp'),
        ResourceWeightingCategoricalDqnAgent(
            3,
            create_lstm_categorical_dqn_network(16, 10),
            batch_size=2,
            save_folder='tmp'),
        ResourceWeightingDdpgAgent(4,
                                   create_lstm_actor_network(16),
                                   create_lstm_critic_network(16),
                                   batch_size=4,
                                   save_folder='tmp'),
        ResourceWeightingTD3Agent(5,
                                  create_lstm_actor_network(16),
                                  create_lstm_critic_network(16),
                                  create_lstm_critic_network(16),
                                  batch_size=4,
                                  save_folder='tmp'),
    ]

    # Load the environment
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'training/settings/resource_allocation.env')

    # Servers and tasks
    server = list(state.server_tasks.keys())[0]
    task_1, task_2, task_3, task_4 = list(state.server_tasks[server])

    # Actions
    actions = {server: {task_1: 1.0, task_2: 3.0, task_3: 0.0, task_4: 5.0}}

    # Environment step
    next_state, rewards, done, _ = env.step(actions)

    # Resource state
    resource_state = ResourceAllocationState(state.server_tasks[server],
                                             server, state.time_step)
    # Next server and resource state
    next_resource_state = ResourceAllocationState(
        next_state.server_tasks[server], server, next_state.time_step)

    for agent in agents:
        agent.resource_allocation_obs(resource_state, actions[server],
                                      next_resource_state, rewards[server])

        agent.train()

    agent = ResourceWeightingSeq2SeqAgent(6,
                                          create_seq2seq_actor_network(),
                                          create_seq2seq_critic_network(),
                                          create_seq2seq_critic_network(),
                                          batch_size=2,
                                          save_folder='tmp')
    agent.resource_allocation_obs(resource_state, actions[server],
                                  next_resource_state, rewards[server])
    agent.resource_allocation_obs(resource_state, actions[server],
                                  next_resource_state, rewards[server])
    agent.train()

    print(
        f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}'
    )
def test_env_resource_allocation_step():
    print()

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'env/settings/resource_allocation.env')
    print(state)
def test_epsilon_policy():
    print()
    # Tests the epsilon policy by getting agent actions that should update the agent epsilon over time

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')

    # Number of epsilon steps for the agents
    epsilon_steps = 25

    # Agents that have a custom _get_action function
    pricing_agents = [
        TaskPricingDqnAgent(0,
                            create_lstm_dqn_network(9, 5),
                            epsilon_steps=epsilon_steps,
                            epsilon_update_freq=1,
                            epsilon_log_freq=1),
        TaskPricingCategoricalDqnAgent(1,
                                       create_lstm_categorical_dqn_network(
                                           9, 5),
                                       epsilon_steps=epsilon_steps,
                                       epsilon_update_freq=1,
                                       epsilon_log_freq=1),
        TaskPricingDdpgAgent(2,
                             create_lstm_actor_network(9),
                             create_lstm_critic_network(9),
                             epsilon_steps=epsilon_steps,
                             epsilon_update_freq=1,
                             epsilon_log_freq=1)
    ]
    weighting_agents = [
        ResourceWeightingDqnAgent(0,
                                  create_lstm_dqn_network(16, 5),
                                  epsilon_steps=epsilon_steps,
                                  epsilon_update_freq=1,
                                  epsilon_log_freq=1),
        ResourceWeightingCategoricalDqnAgent(
            1,
            create_lstm_categorical_dqn_network(16, 5),
            epsilon_steps=epsilon_steps,
            epsilon_update_freq=1,
            epsilon_log_freq=1),
        ResourceWeightingDdpgAgent(2,
                                   create_lstm_actor_network(16),
                                   create_lstm_critic_network(16),
                                   epsilon_steps=epsilon_steps,
                                   epsilon_update_freq=1,
                                   epsilon_log_freq=1)
    ]

    # Generate a tf writer and generate actions that will update the epsilon values for both agents
    writer = tf.summary.create_file_writer(f'agent/tmp/testing_epsilon')
    num_steps = 10
    with writer.as_default():
        for _ in range(num_steps):
            for agent in pricing_agents:
                actions = {
                    server: agent.bid(state.auction_task,
                                      tasks,
                                      server,
                                      state.time_step,
                                      training=True)
                    for server, tasks in state.server_tasks.items()
                }

        state, rewards, done, _ = env.step(actions)

        for _ in range(num_steps):
            for agent in weighting_agents:
                actions = {
                    server: agent.weight(tasks,
                                         server,
                                         state.time_step,
                                         training=True)
                    for server, tasks in state.server_tasks.items()
                }

        state, rewards, done, _ = env.step(actions)

    # Check that the resulting total action are valid
    for agent in pricing_agents:
        print(f'Agent: {agent.name}')
        assert agent.total_actions == num_steps * 3

    for agent in weighting_agents:
        print(f'Agent: {agent.name}')
        assert agent.total_actions == num_steps * 3

    # Check that the agent epsilon are correct
    assert pricing_agents[0].final_epsilon == pricing_agents[
        0].epsilon and pricing_agents[1].final_epsilon == pricing_agents[
            1].epsilon
    assert weighting_agents[0].final_epsilon == weighting_agents[
        0].epsilon and weighting_agents[1].final_epsilon == weighting_agents[
            1].epsilon
    assert pricing_agents[2].final_epsilon_std == pricing_agents[2].epsilon_std
    assert weighting_agents[2].final_epsilon_std == weighting_agents[
        2].epsilon_std
Пример #13
0
def test_ddpg_actions():
    print()
    # Check that DDPG actions are valid
    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')

    repeat, max_repeat = 0, 10
    auction_actions = {}
    while repeat <= max_repeat:
        pricing_agent = TaskPricingDdpgAgent(3,
                                             create_lstm_actor_network(9),
                                             create_lstm_critic_network(9),
                                             initial_epsilon=0.5)
        auction_actions = {
            server: pricing_agent.bid(state.auction_task, tasks, server,
                                      state.time_step)
            for server, tasks in state.server_tasks.items()
        }
        print(f'Greedy actions: {list(auction_actions.values())}')
        if any(0 < action for server, action in auction_actions.items()):

            auction_actions = {
                server: pricing_agent.bid(state.auction_task,
                                          tasks,
                                          server,
                                          state.time_step,
                                          training=True)
                for server, tasks in state.server_tasks.items()
            }
            print(
                f'Epsilon Greedy actions: {list(auction_actions.values())}\n')
            if any(0 < action for server, action in auction_actions.items()):
                break
        elif repeat == max_repeat:
            raise Exception()
        else:
            repeat += 1

    states, rewards, dones, _ = env.step(auction_actions)

    repeat, max_repeat = 0, 10
    while repeat <= max_repeat:
        weighting_agent = ResourceWeightingDdpgAgent(
            3,
            create_lstm_actor_network(16),
            create_lstm_critic_network(16),
            initial_epsilon=0.5)
        weighting_actions = {
            server: weighting_agent.weight(tasks, server, state.time_step)
            for server, tasks in state.server_tasks.items()
        }
        print(
            f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
        )
        if any(0 < action
               for server, task_actions in weighting_actions.items()
               for task, action in task_actions.items()):
            weighting_actions = {
                server: weighting_agent.weight(tasks,
                                               server,
                                               state.time_step,
                                               training=True)
                for server, tasks in state.server_tasks.items()
            }
            print(
                f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
            )
            if any(0 < action
                   for server, task_actions in weighting_actions.items()
                   for task, action in task_actions.items()):
                break
        elif repeat == max_repeat:
            raise Exception()
        else:
            repeat += 1
Пример #14
0
def test_c51_actions():
    print()
    # Test the C51 agent actions
    pricing_agent = TaskPricingCategoricalDqnAgent(
        3, create_lstm_categorical_dqn_network(9, 5), initial_epsilon=0.5)
    weighting_agent = ResourceWeightingCategoricalDqnAgent(
        3, create_lstm_categorical_dqn_network(16, 5), initial_epsilon=0.5)

    env, state = OnlineFlexibleResourceAllocationEnv.load_env(
        'agent/settings/actions.env')
    auction_actions = {
        server: pricing_agent.bid(state.auction_task, tasks, server,
                                  state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    print(f'Greedy actions: {list(auction_actions.values())}')
    assert any(0 < action for server, action in auction_actions.items())

    server, tasks = next(
        (server, tasks) for server, tasks in state.server_tasks.items())
    observation = tf.expand_dims(pricing_agent._network_obs(
        state.auction_task, tasks, server, state.time_step),
                                 axis=0)
    network_output = pricing_agent.model_network(observation)
    probabilities = tf.nn.softmax(network_output)
    probability_value = probabilities * pricing_agent.z_values
    q_values = tf.reduce_sum(probability_value, axis=2)
    argmax_q_values = tf.math.argmax(q_values, axis=1, output_type=tf.int32)
    print(
        f'Network output: {network_output}\nProbabilities: {probabilities}\nProbability value: {probability_value}\n'
        f'Q value: {q_values}\nArgmax Q value: {argmax_q_values}')

    auction_actions = {
        server: pricing_agent.bid(state.auction_task,
                                  tasks,
                                  server,
                                  state.time_step,
                                  training=True)
        for server, tasks in state.server_tasks.items()
    }
    print(f'Epsilon Greedy actions: {list(auction_actions.values())}\n')
    assert any(0 < action for server, action in auction_actions.items())

    states, rewards, dones, _ = env.step(auction_actions)

    weighting_actions = {
        server: weighting_agent.weight(tasks, server, state.time_step)
        for server, tasks in state.server_tasks.items()
    }
    print(
        f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
    )
    assert any(0 < action for server, action in auction_actions.items())

    weighting_actions = {
        server: weighting_agent.weight(tasks,
                                       server,
                                       state.time_step,
                                       training=True)
        for server, tasks in state.server_tasks.items()
    }
    print(
        f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}'
    )
    assert any(0 < action
               for server, task_actions in weighting_actions.items()
               for task, action in task_actions.items())