Пример #1
0
def generate_eval_envs(eval_env: OnlineFlexibleResourceAllocationEnv, num_evals: int, folder: str,
                       overwrite: bool = False) -> List[str]:
    """
    Generates and saves the evaluation environment used for evaluating training of the agents

    Args:
        eval_env: The evaluation environment used to generate the files
        num_evals: The number of environments generated
        folder: The folder where the environments are generated
        overwrite: If to overwrite previous environments saved

    Returns: A list of environment file paths
    """
    if not os.path.exists(folder):
        os.makedirs(folder)

    eval_files = []
    for eval_num in range(num_evals):
        eval_file = f'{folder}/eval_{eval_num}.env'
        eval_files.append(eval_file)
        if overwrite or not os.path.exists(eval_file):
            eval_env.reset()
            eval_env.save_env(eval_file)

    return eval_files
Пример #2
0
def test_env_load_settings():
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    env_state = env.reset()

    for server, tasks in env_state.server_tasks.items():
        server.assert_valid()
        for task in tasks:
            task.assert_valid()

    for task in env._unallocated_tasks:
        task.assert_valid()
Пример #3
0
def test_env_save_load():
    # TODO add comments
    env = OnlineFlexibleResourceAllocationEnv('env/settings/basic.env')
    state = env.reset()

    random_task_pricing = RandomTaskPricingAgent(0)
    random_resource_weighting = RandomResourceWeightingAgent(0)

    for _ in range(40):
        if state.auction_task is not None:
            actions = {
                server: random_task_pricing.bid(state.auction_task, tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        else:
            actions = {
                server: random_resource_weighting.weight(tasks, server, state.time_step)
                for server, tasks in state.server_tasks.items()
            }
        state, rewards, done, info = env.step(actions)

    env.save_env('env/settings/tmp/save.env')
    loaded_env, loaded_env_state = env.load_env('env/settings/tmp/save.env')

    assert state.auction_task == loaded_env_state.auction_task
    assert len(env._unallocated_tasks) == len(loaded_env._unallocated_tasks)
    for task, loaded_task in zip(env._unallocated_tasks, loaded_env._unallocated_tasks):
        assert task == loaded_task
    for server, tasks in state.server_tasks.items():
        loaded_server, loaded_tasks = next(((loaded_server, loaded_tasks)
                                            for loaded_server, loaded_tasks in state.server_tasks.items()
                                            if loaded_server.name == server.name), (None, None))
        assert loaded_server is not None and loaded_tasks is not None
        assert server.name == loaded_server.name and server.storage_cap == loaded_server.storage_cap and \
            server.computational_cap == loaded_server.computational_cap and \
            server.bandwidth_cap == loaded_server.bandwidth_cap
        for task, loaded_task in zip(tasks, loaded_tasks):
            assert task.name == loaded_task.name and task.required_storage == loaded_task.required_storage and \
                task.required_computation == loaded_task.required_computation and \
                task.required_results_data == loaded_task.required_results_data and \
                task.auction_time == loaded_task.auction_time and task.deadline == loaded_task.deadline and \
                task.stage is loaded_task.stage and task.loading_progress == loaded_task.loading_progress and \
                task.compute_progress == loaded_task.compute_progress and \
                task.sending_progress == loaded_task.sending_progress and task.price == loaded_task.price
            task.assert_valid()

    loaded_env.save_env('env/settings/tmp/loaded_save.env')
    with open('env/settings/tmp/save.env') as env_file:
        env_file_data = env_file.read()
    with open('env/settings/tmp/loaded_save.env') as loaded_env_file:
        loaded_env_file_data = loaded_env_file.read()
    assert env_file_data == loaded_env_file_data
Пример #4
0
def train_agent(training_env: OnlineFlexibleResourceAllocationEnv, pricing_agents: List[TaskPricingRLAgent],
                weighting_agents: List[ResourceWeightingRLAgent]):
    """
    Trains reinforcement learning agents through the provided environment

    Args:
        training_env: Training environment used
        pricing_agents: A list of reinforcement learning task pricing agents
        weighting_agents: A list of reinforcement learning resource weighting agents
    """
    # Reset the environment getting a new training environment for this episode
    state = training_env.reset()

    # Allocate the servers with their random task pricing and resource weighting agents
    server_pricing_agents: Dict[Server, TaskPricingRLAgent] = {
        server: rnd.choice(pricing_agents) for server in state.server_tasks.keys()
    }
    server_weighting_agents: Dict[Server, ResourceWeightingRLAgent] = {
        server: rnd.choice(weighting_agents) for server in state.server_tasks.keys()
    }

    # Store each server's auction observations with it being (None for first auction because no observation was seen previously)
    #   the agent state for the auction (auction task, server tasks, server, time), the action taken and if the auction task was won
    server_auction_states: Dict[Server, Optional[Tuple[TaskPricingState, float, bool]]] = {
        server: None for server in state.server_tasks.keys()
    }

    # For successful auctions, then the agent state of the winning bid, the action taken and the following observation are
    #   all stored in order to be added as an agent observation after the task finishes in order to know if the task was completed or not
    successful_auction_states: List[Tuple[TaskPricingState, float, TaskPricingState]] = []

    # The environment is looped over till the environment is done (the current time step > environment total time steps)
    done = False
    while not done:
        # If the state has a task to be auctioned then find the pricing of each servers as the action
        if state.auction_task:
            # Get the bids for each server
            auction_prices = {
                server: server_pricing_agents[server].bid(state.auction_task, tasks, server, state.time_step,
                                                          training=True)
                for server, tasks in state.server_tasks.items()
            }

            # Environment step using the pricing actions to get the next state, rewards, done and info
            next_state, rewards, done, info = training_env.step(auction_prices)

            # Update the server_auction_observations and auction_trajectories variables with the new next_state info
            for server, tasks in state.server_tasks.items():
                # Generate the current agent's state
                current_state = TaskPricingState(state.auction_task, tasks, server, state.time_step)

                if server_auction_states[server]:  # If a server auction observation exists
                    # Get the last time steps agent state, action and if the server won the auction
                    previous_state, previous_action, is_previous_auction_win = server_auction_states[server]

                    # If the server won the auction in the last time step then add the info to the auction trajectories
                    if is_previous_auction_win:
                        successful_auction_states.append((previous_state, previous_action, current_state))
                    else:
                        # Else add the agent state to the agent's replay buffer as a failed auction bid
                        # Else add the observation as a failure to the task pricing rl_agents
                        server_pricing_agents[server].failed_auction_bid(previous_state, previous_action, current_state)

                # Update the server auction agent states with the current agent state
                server_auction_states[server] = (current_state, auction_prices[server], server in rewards)
        else:  # Else the environment is at resource allocation stage
            # For each server and each server task calculate its relative weighting
            weighting_actions: Dict[Server, Dict[Task, float]] = {
                server: server_weighting_agents[server].weight(tasks, server, state.time_step, training=True)
                for server, tasks in state.server_tasks.items()
            }

            # Environment step using the resource weighting actions to get the next state, rewards, done and info
            next_state, finished_server_tasks, done, info = training_env.step(weighting_actions)

            # For each server, there are may be finished tasks due to the resource allocation
            #    therefore add the task pricing auction agent states with the finished tasks
            for server, finished_tasks in finished_server_tasks.items():
                for finished_task in finished_tasks:
                    # Get the successful auction agent state from the list of successful auction agent states
                    successful_auction = next((auction_agent_state
                                               for auction_agent_state in successful_auction_states
                                               if auction_agent_state[0].auction_task == finished_task), None)
                    if successful_auction is None:
                        print(f'Number of successful auction agent states: {len(successful_auction_states)}')
                        print(
                            f'Number of server tasks: {sum(len(tasks) for tasks in next_state.server_tasks.values())}')
                        print(f'Finished task: {str(finished_task)}\n\n')
                        print(f'State: {str(state)}\n')
                        print(f'Next state: {str(next_state)}')
                        break

                    # Remove the successful auction agent state
                    successful_auction_states.remove(successful_auction)

                    # Unwrap the successful auction agent state tuple
                    auction_state, action, next_auction_state = successful_auction

                    # Add the winning auction bid info to the agent
                    server_pricing_agents[server].winning_auction_bid(auction_state, action, finished_task,
                                                                      next_auction_state)

            # Add the agent states for resource allocation
            for server, tasks in state.server_tasks.items():
                agent_state = ResourceAllocationState(tasks, server, state.time_step)
                next_agent_state = ResourceAllocationState(next_state.server_tasks[server], server,
                                                           next_state.time_step)

                server_weighting_agents[server].resource_allocation_obs(agent_state, weighting_actions[server],
                                                                        next_agent_state, finished_server_tasks[server])
        assert all(task.auction_time <= next_state.time_step <= task.deadline
                   for _, tasks in next_state.server_tasks.items() for task in tasks)
        # Update the state with the next state
        state = next_state
def test_env_step_rnd_action():
    """
    Tests the environment works with random actions
    """
    print()

    # Generate the environment
    env = OnlineFlexibleResourceAllocationEnv([
        '../src/training/settings/basic.env',
        '../src/training/settings/large_tasks_servers.env',
        '../src/training/settings/mixture_tasks_servers.env',
        '../src/training/settings/limited_resources.env',
    ])

    # Random action agents
    random_task_pricing, random_resource_weighting = RandomTaskPricingAgent(
        0), RandomResourceWeightingAgent(0)

    # Run the environment multiple times
    for _ in tqdm(range(200)):
        state = env.reset()

        # Number of auction opportunities
        num_auction_opportunities = len(
            env._unallocated_tasks) + (1 if state.auction_task else 0)
        # Number of auction and resource allocation steps taken
        num_auctions, num_resource_allocations = 0, 0
        # Number of environment server
        num_servers = len(state.server_tasks)

        # Take random steps over the environment
        done = False
        while not done:
            # Check that the number of servers is constant
            assert len(state.server_tasks) == num_servers

            # Generate the actions
            if state.auction_task:
                actions: Dict[Server, float] = {
                    server: random_task_pricing.bid(state.auction_task,
                                                    allocated_tasks, server,
                                                    state.time_step)
                    for server, allocated_tasks in state.server_tasks.items()
                }
                num_auctions += 1
            else:
                actions: Dict[Server, Dict[Task, float]] = {
                    server:
                    random_resource_weighting.weight(tasks, server,
                                                     state.time_step)
                    for server, tasks in state.server_tasks.items()
                }
                num_resource_allocations += 1

            # Take the action on the environment
            state, reward, done, info = env.step(actions)
            assert all(task.auction_time <= state.time_step <= task.deadline
                       for _, tasks in state.server_tasks.items()
                       for task in tasks)
            for server, tasks in state.server_tasks.items():
                server.assert_valid()
                for task in tasks:
                    task.assert_valid()

        # Check that the number of auction and resource allocation steps are correct
        assert state.auction_task is None
        assert len(env._unallocated_tasks) == 0
        assert num_auctions == num_auction_opportunities
        assert num_resource_allocations == env._total_time_steps + 1