def test_seq2seq_training(): print() env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'training/settings/resource_allocation.env') agent = ResourceWeightingSeq2SeqAgent(0, create_seq2seq_actor_network(), create_seq2seq_critic_network(), create_seq2seq_critic_network(), batch_size=1, save_folder='tmp') for _ in range(4): actions = { server: agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } next_state, rewards, done, _ = env.step(actions) for server in state.server_tasks.keys(): resource_state = ResourceAllocationState( state.server_tasks[server], server, state.time_step) next_resource_state = ResourceAllocationState( next_state.server_tasks[server], server, next_state.time_step) agent.resource_allocation_obs(resource_state, actions[server], next_resource_state, rewards[server]) state = next_state agent.batch_size = len(agent.replay_buffer) print(f'Batch size: {agent.batch_size}') agent.train()
def test_seq2seq_actions(): print() # Check that Seq2seq PG actions are valid env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/resource_allocation.env') actor_network = create_seq2seq_actor_network() critic_network = create_seq2seq_critic_network() twin_critic_network = create_seq2seq_critic_network() seq2seq_agent = ResourceWeightingSeq2SeqAgent(0, actor_network, critic_network, twin_critic_network) weighting_actions = { server: seq2seq_agent.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(weighting_actions) weighting_actions = { server: seq2seq_agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(weighting_actions)
def test_agent_actions(): print() pricing_agents = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5)), TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 5)), TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 5)), TaskPricingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(9, 5)), TaskPricingDdpgAgent(4, create_lstm_actor_network(9), create_lstm_critic_network(9)), TaskPricingTD3Agent(5, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9)) ] weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5)), ResourceWeightingDdqnAgent(1, create_lstm_dqn_network(16, 5)), ResourceWeightingDuelingDqnAgent( 2, create_lstm_dueling_dqn_network(16, 5)), ResourceWeightingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(16, 5)), ResourceWeightingDdpgAgent(4, create_lstm_actor_network(16), create_lstm_critic_network(16)), ResourceWeightingTD3Agent(5, create_lstm_actor_network(16), create_lstm_critic_network(16), create_lstm_critic_network(16)) ] env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') for agent in pricing_agents: actions = { server: agent.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } # noinspection PyUnboundLocalVariable print( f'Actions: {{{", ".join([f"{server.name}: {action}" for server, action in actions.items()])}}}' ) state, rewards, done, _ = env.step(actions) for agent in weighting_agents: actions = { server: agent.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print( f'Actions: {{{", ".join([f"{server.name}: {list(task_action.values())}" for server, task_action in actions.items()])}}}' ) state, rewards, done, _ = env.step(actions)
def test_env_save_load(): # TODO add comments env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env') state = env.reset() random_task_pricing = RandomTaskPricingAgent(0) random_resource_weighting = RandomResourceWeightingAgent(0) for _ in range(40): if state.auction_task is not None: actions = { server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } else: actions = { server: random_resource_weighting.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } state, rewards, done, info = env.step(actions) env.save_env('env/settings/tmp/save.env') loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env') assert state.auction_task == loaded_env_state.auction_task assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks) for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks): assert task == loaded_task for server, tasks in state.server_tasks.items(): loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks) for loaded_server, loaded_tasks in state.server_tasks.items() if loaded_server.name == server.name), (None, None)) assert loaded_server is not None and loaded_tasks is not None assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \ server.computational_cap == loaded_server.computational_cap and \ server.bandwidth_cap == loaded_server.bandwidth_cap for task, loaded_task in zip(tasks, loaded_tasks): assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \ task.required_computation == loaded_task.required_computation and \ task.required_results_data == loaded_task.required_results_data and \ task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \ task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \ task.compute_progress == loaded_task.compute_progress and \ task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price task.assert_valid() loaded_env.save_env('env/settings/tmp/loaded_save.env') with open('env/settings/tmp/save.env') as env_file: env_file_data = env_file.read() with open('env/settings/tmp/loaded_save.env') as loaded_env_file: loaded_env_file_data = loaded_env_file.read() assert env_file_data == loaded_env_file_data
def eval_fixed_env(eval_envs_filename): total_completed_tasks = [] for eval_env_filename in eval_envs_filename: env, state = OnlineFlexibleResourceAllocationEnv.load_env( eval_env_filename) try: fixed_completed_tasks = fixed_resource_allocation_model(env, state) except Exception as e: fixed_completed_tasks = -1 total_completed_tasks.append(fixed_completed_tasks) return total_completed_tasks
def test_env_auction_step(): env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'env/settings/auction.env') server_0, server_1, server_2 = list(state.server_tasks.keys()) assert server_0.name == 'Basic 0' and server_1.name == 'Basic 1' and server_2.name == 'Basic 2' # Tests a normal circumstance for the Vickrey auction with second price winning actions = {server_0: 1.0, server_1: 3.0, server_2: 0.0} next_state, rewards, done, info = env.step(actions) assert server_0 in rewards and rewards[server_0] == 3.0 assert len(state.server_tasks[server_0]) + 1 == len(next_state.server_tasks[server_0]) and \ len(state.server_tasks[server_1]) == len(next_state.server_tasks[server_1]) and \ len(state.server_tasks[server_2]) == len(next_state.server_tasks[server_2]) state = next_state # Test a case where server provide the same price actions = {server_0: 3.0, server_1: 3.0, server_2: 0.0} next_state, rewards, done, _ = env.step(actions) assert (server_0 in rewards and rewards[server_0] == 3.0) or (server_1 in rewards and rewards[server_1] == 3.0) assert len(next_state.server_tasks[server_0]) == len(state.server_tasks[server_0]) + 1 or \ len(next_state.server_tasks[server_1]) == len(state.server_tasks[server_1]) + 1 # Test where no server provides a price actions = {server_0: 0.0, server_1: 0.0, server_2: 0.0} state, rewards, done, _ = env.step(actions) assert len(rewards) == 0 # Test where only a single server provides a price actions = {server_0: 1.0, server_1: 0.0, server_2: 0.0} next_state, rewards, done, _ = env.step(actions) assert server_0 in rewards and rewards[server_0] == 1.0 assert len(next_state.server_tasks[server_0]) == len( state.server_tasks[server_0]) + 1 # Test all of the server bid actions = {server_0: 2.0, server_1: 3.0, server_2: 1.0} state, rewards, done, _ = env.step(actions) assert server_2 in rewards and rewards[server_2] == 2.0
def eval_agent(env_filenames: List[str], episode: int, pricing_agents: List[TaskPricingAgent], weighting_agents: List[ResourceWeightingAgent]) -> EvalResults: """ Evaluation of agents using a list of preset environments Args: env_filenames: Evaluation environment filenames episode: The episode of evaluation pricing_agents: List of task pricing agents weighting_agents: List of resource weighting agents Returns: The evaluation results """ results = EvalResults() for env_filename in env_filenames: eval_env, state = OnlineFlexibleResourceAllocationEnv.load_env(env_filename) server_pricing_agents, server_weighting_agents = allocate_agents(state, pricing_agents, weighting_agents) done = False while not done: if state.auction_task: bidding_actions = { server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } state, rewards, done, info = eval_env.step(bidding_actions) results.auction(bidding_actions, rewards) else: weighting_actions = { server: server_weighting_agents[server].weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } state, rewards, done, info = eval_env.step(weighting_actions) results.resource_allocation(weighting_actions, rewards) results.finished_env() results.save(episode) return results
def test_agent_evaluation(): print() setup_tensorboard('training/results/tmp/', 'agent_eval') env = OnlineFlexibleResourceAllocationEnv('training/settings/basic.env') eval_envs = generate_eval_envs(env, 5, 'training/settings/tmp/', overwrite=True) assert len(os.listdir('training/settings/tmp/')) == 5 total_auctions, total_resource_allocation = 0, 0 for eval_env in eval_envs: env, state = OnlineFlexibleResourceAllocationEnv.load_env(eval_env) total_auctions += len(env._unallocated_tasks) + ( 1 if state.auction_task is not None else 0) total_resource_allocation += env._total_time_steps + 1 pricing_agents = [ TaskPricingDqnAgent(0, create_bidirectional_dqn_network(9, 5)), TaskPricingDdpgAgent(1, create_lstm_actor_network(9), create_lstm_critic_network(9)) ] weighting_agents = [ ResourceWeightingDqnAgent(2, create_bidirectional_dqn_network(16, 5)), ResourceWeightingDdpgAgent(3, create_lstm_actor_network(16), create_lstm_critic_network(16)), ] results = eval_agent(eval_envs, 0, pricing_agents, weighting_agents) print( f'Results - Total prices: {results.total_prices}, Number of completed tasks: {results.num_completed_tasks}, ' f'failed tasks: {results.num_failed_tasks}, winning prices: {results.winning_prices}, ' f'Number of auctions: {results.num_auctions}, resource allocations: {results.num_resource_allocations}' ) assert 0 < results.num_completed_tasks assert 0 < results.num_failed_tasks assert results.num_auctions == total_auctions assert results.num_resource_allocations == total_resource_allocation
def test_task_price_training(): print() setup_tensorboard('/tmp/results/', 'price_training') # List of agents agents: List[TaskPricingRLAgent] = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingDdqnAgent(1, create_lstm_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingDuelingDqnAgent(2, create_lstm_dueling_dqn_network(9, 10), batch_size=4, save_folder='tmp'), TaskPricingCategoricalDqnAgent(3, create_lstm_categorical_dqn_network( 9, 10), batch_size=4, save_folder='tmp'), TaskPricingDdpgAgent(4, create_lstm_actor_network(9), create_lstm_critic_network(9), batch_size=4, save_folder='tmp'), TaskPricingTD3Agent(5, create_lstm_actor_network(9), create_lstm_critic_network(9), create_lstm_critic_network(9), batch_size=4, save_folder='tmp') ] # Load the environment env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'training/settings/auction.env') # Servers server_1, server_2 = list(state.server_tasks.keys()) # Actions actions = {server_1: 1.0, server_2: 2.0} # Environment step next_state, reward, done, info = env.step(actions) # Server states server_1_state = TaskPricingState(state.auction_task, state.server_tasks[server_1], server_1, state.time_step) server_2_state = TaskPricingState(state.auction_task, state.server_tasks[server_2], server_2, state.time_step) # Next server states next_server_1_state = TaskPricingState(next_state.auction_task, next_state.server_tasks[server_1], server_1, next_state.time_step) next_server_2_state = TaskPricingState(next_state.auction_task, next_state.server_tasks[server_2], server_2, next_state.time_step) # Finished auction task finished_task = next(finished_task for finished_task in next_state.server_tasks[server_1] if finished_task == state.auction_task) finished_task = finished_task._replace(stage=TaskStage.COMPLETED) failed_task = finished_task._replace(stage=TaskStage.FAILED) # Loop over the agents, add the observations and try training for agent in agents: agent.winning_auction_bid(server_1_state, actions[server_1], finished_task, next_server_1_state) agent.winning_auction_bid(server_1_state, actions[server_1], failed_task, next_server_1_state) agent.failed_auction_bid(server_2_state, actions[server_2], next_server_2_state) agent.failed_auction_bid(server_2_state, 0, next_server_2_state) agent.train() print( f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}' )
def test_resource_allocation_training(): print() setup_tensorboard('/tmp/results/', 'resource_allocation_training') # List of agents agents: List[ResourceWeightingRLAgent] = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 10), batch_size=4, save_folder='tmp'), ResourceWeightingDdqnAgent(1, create_lstm_dqn_network(16, 10), batch_size=4, save_folder='tmp'), ResourceWeightingDuelingDqnAgent(2, create_lstm_dueling_dqn_network( 16, 10), batch_size=4, save_folder='tmp'), ResourceWeightingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(16, 10), batch_size=2, save_folder='tmp'), ResourceWeightingDdpgAgent(4, create_lstm_actor_network(16), create_lstm_critic_network(16), batch_size=4, save_folder='tmp'), ResourceWeightingTD3Agent(5, create_lstm_actor_network(16), create_lstm_critic_network(16), create_lstm_critic_network(16), batch_size=4, save_folder='tmp'), ] # Load the environment env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'training/settings/resource_allocation.env') # Servers and tasks server = list(state.server_tasks.keys())[0] task_1, task_2, task_3, task_4 = list(state.server_tasks[server]) # Actions actions = {server: {task_1: 1.0, task_2: 3.0, task_3: 0.0, task_4: 5.0}} # Environment step next_state, rewards, done, _ = env.step(actions) # Resource state resource_state = ResourceAllocationState(state.server_tasks[server], server, state.time_step) # Next server and resource state next_resource_state = ResourceAllocationState( next_state.server_tasks[server], server, next_state.time_step) for agent in agents: agent.resource_allocation_obs(resource_state, actions[server], next_resource_state, rewards[server]) agent.train() agent = ResourceWeightingSeq2SeqAgent(6, create_seq2seq_actor_network(), create_seq2seq_critic_network(), create_seq2seq_critic_network(), batch_size=2, save_folder='tmp') agent.resource_allocation_obs(resource_state, actions[server], next_resource_state, rewards[server]) agent.resource_allocation_obs(resource_state, actions[server], next_resource_state, rewards[server]) agent.train() print( f'Rewards: {[trajectory[3] for trajectory in agents[0].replay_buffer]}' )
def test_env_resource_allocation_step(): print() env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'env/settings/resource_allocation.env') print(state)
def test_epsilon_policy(): print() # Tests the epsilon policy by getting agent actions that should update the agent epsilon over time env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') # Number of epsilon steps for the agents epsilon_steps = 25 # Agents that have a custom _get_action function pricing_agents = [ TaskPricingDqnAgent(0, create_lstm_dqn_network(9, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), TaskPricingCategoricalDqnAgent(1, create_lstm_categorical_dqn_network( 9, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), TaskPricingDdpgAgent(2, create_lstm_actor_network(9), create_lstm_critic_network(9), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1) ] weighting_agents = [ ResourceWeightingDqnAgent(0, create_lstm_dqn_network(16, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), ResourceWeightingCategoricalDqnAgent( 1, create_lstm_categorical_dqn_network(16, 5), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1), ResourceWeightingDdpgAgent(2, create_lstm_actor_network(16), create_lstm_critic_network(16), epsilon_steps=epsilon_steps, epsilon_update_freq=1, epsilon_log_freq=1) ] # Generate a tf writer and generate actions that will update the epsilon values for both agents writer = tf.summary.create_file_writer(f'agent/tmp/testing_epsilon') num_steps = 10 with writer.as_default(): for _ in range(num_steps): for agent in pricing_agents: actions = { server: agent.bid(state.auction_task, tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(actions) for _ in range(num_steps): for agent in weighting_agents: actions = { server: agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } state, rewards, done, _ = env.step(actions) # Check that the resulting total action are valid for agent in pricing_agents: print(f'Agent: {agent.name}') assert agent.total_actions == num_steps * 3 for agent in weighting_agents: print(f'Agent: {agent.name}') assert agent.total_actions == num_steps * 3 # Check that the agent epsilon are correct assert pricing_agents[0].final_epsilon == pricing_agents[ 0].epsilon and pricing_agents[1].final_epsilon == pricing_agents[ 1].epsilon assert weighting_agents[0].final_epsilon == weighting_agents[ 0].epsilon and weighting_agents[1].final_epsilon == weighting_agents[ 1].epsilon assert pricing_agents[2].final_epsilon_std == pricing_agents[2].epsilon_std assert weighting_agents[2].final_epsilon_std == weighting_agents[ 2].epsilon_std
def test_ddpg_actions(): print() # Check that DDPG actions are valid env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') repeat, max_repeat = 0, 10 auction_actions = {} while repeat <= max_repeat: pricing_agent = TaskPricingDdpgAgent(3, create_lstm_actor_network(9), create_lstm_critic_network(9), initial_epsilon=0.5) auction_actions = { server: pricing_agent.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print(f'Greedy actions: {list(auction_actions.values())}') if any(0 < action for server, action in auction_actions.items()): auction_actions = { server: pricing_agent.bid(state.auction_task, tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } print( f'Epsilon Greedy actions: {list(auction_actions.values())}\n') if any(0 < action for server, action in auction_actions.items()): break elif repeat == max_repeat: raise Exception() else: repeat += 1 states, rewards, dones, _ = env.step(auction_actions) repeat, max_repeat = 0, 10 while repeat <= max_repeat: weighting_agent = ResourceWeightingDdpgAgent( 3, create_lstm_actor_network(16), create_lstm_critic_network(16), initial_epsilon=0.5) weighting_actions = { server: weighting_agent.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print( f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}' ) if any(0 < action for server, task_actions in weighting_actions.items() for task, action in task_actions.items()): weighting_actions = { server: weighting_agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } print( f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}' ) if any(0 < action for server, task_actions in weighting_actions.items() for task, action in task_actions.items()): break elif repeat == max_repeat: raise Exception() else: repeat += 1
def test_c51_actions(): print() # Test the C51 agent actions pricing_agent = TaskPricingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(9, 5), initial_epsilon=0.5) weighting_agent = ResourceWeightingCategoricalDqnAgent( 3, create_lstm_categorical_dqn_network(16, 5), initial_epsilon=0.5) env, state = OnlineFlexibleResourceAllocationEnv.load_env( 'agent/settings/actions.env') auction_actions = { server: pricing_agent.bid(state.auction_task, tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print(f'Greedy actions: {list(auction_actions.values())}') assert any(0 < action for server, action in auction_actions.items()) server, tasks = next( (server, tasks) for server, tasks in state.server_tasks.items()) observation = tf.expand_dims(pricing_agent._network_obs( state.auction_task, tasks, server, state.time_step), axis=0) network_output = pricing_agent.model_network(observation) probabilities = tf.nn.softmax(network_output) probability_value = probabilities * pricing_agent.z_values q_values = tf.reduce_sum(probability_value, axis=2) argmax_q_values = tf.math.argmax(q_values, axis=1, output_type=tf.int32) print( f'Network output: {network_output}\nProbabilities: {probabilities}\nProbability value: {probability_value}\n' f'Q value: {q_values}\nArgmax Q value: {argmax_q_values}') auction_actions = { server: pricing_agent.bid(state.auction_task, tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } print(f'Epsilon Greedy actions: {list(auction_actions.values())}\n') assert any(0 < action for server, action in auction_actions.items()) states, rewards, dones, _ = env.step(auction_actions) weighting_actions = { server: weighting_agent.weight(tasks, server, state.time_step) for server, tasks in state.server_tasks.items() } print( f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}' ) assert any(0 < action for server, action in auction_actions.items()) weighting_actions = { server: weighting_agent.weight(tasks, server, state.time_step, training=True) for server, tasks in state.server_tasks.items() } print( f'Greedy actions: {[list(actions.values()) for actions in weighting_actions.values()]}' ) assert any(0 < action for server, task_actions in weighting_actions.items() for task, action in task_actions.items())