Exemplo n.º 1
    def __init__(self, env, max_layer):
        self.env = env
        self.modifications = []
        self.counter = 0
        self.nodes = []

        agent = QAgent(env)
        agent.qlearn(600, show=False)
        wall_dict = wall_interference(agent)
        cell_dict = cell_frequency(agent)

        for element in wall_dict:
            self.modifications.append((0, element[0][0], element[0][1]))

        for element in cell_dict[0:14]:
            self.modifications.append((1, element[0][0], element[0][1]))

        self.num_nodes = 0
        self.root = None
        self.max_layer = max_layer
        self.threshold = 8

        # Storing best reward and corresponding environment
        self.max_reward = float("-inf")
        self.opt_env = None
Exemplo n.º 2
def path_based(env, num_mods):
    # Train agent in original environment
    agent = QAgent(env)
    agent.qlearn(600, render=False)
    cell_dict = cell_frequency(agent)
    wall_dict = wall_interference(agent)

    opt_seq = None
    opt_val = float("-inf")

    for k in range(num_mods):
        seq = []
        ref = copy.deepcopy(env)
        # Pick k modifications on walls
        walls_to_remove = [wall_dict[i][0] for i in range(k)]
        for wall in walls_to_remove:
            ref = ref.transition([(wall)])
            seq.append((0, wall[0], wall[1]))

        num_special_cells = num_mods - k
        cells_to_assign = [cell_dict[i][0] for i in range(num_special_cells)]

        for cell in cells_to_assign:
            seq.append((1, cell[0], cell[1]))

        agent_k = QAgent(ref)
        print(colored("Iteration {} begins!".format(k), "red"))
        print(ref.walls, ref.special)
        agent_k.qlearn(600, render=False)

        rews = utility(agent_k)
        if rews > opt_val:
            opt_val = rews
            opt_seq = seq

    return (opt_seq, opt_val)
Exemplo n.º 3
def greedy(env, num_mods):
    # This function returns the sequence of modifications based on the wall and cell heuristics
    # Parameters
    # ===============================================================
    # env: the original environment
    # num_mods: the number of modifications
    # ===============================================================

    greedy_seq = []
    ref = copy.deepcopy(env)
    agent = None

    for i in range(num_mods):
        # For each iteration, find out the wall that most interferes and the cell that is crossed the most. Try out all options.
        if i == 0:
            agent = QAgent(ref)
            agent.qlearn(600, render=False)

            agent = connected_qlearn(agent, ref, 300)

        # Take out the lists from the heuristics.
        wall_dict = wall_interference(agent)
        cell_dict = cell_frequency(agent)

        # Take out the max values, and the options to try out.
        wall_nums = [elem[1] for elem in wall_dict]
        max_wall = max(wall_nums)

        cell_nums = [elem[1] for elem in cell_dict]
        max_cell = max(cell_nums)

        wall_options = [elem[0] for elem in wall_dict if elem[1] == max_wall]
        cell_options = [elem[0] for elem in cell_dict if elem[1] == max_cell]

        # Test out all the options, get optimal modification
        opt_value = float("-inf")
        opt_choice = None
        category = -1

        for wall in wall_options:
            print(colored("Testing environment", "red"))
            e = ref.transition([wall])
            new_agent = connected_qlearn(agent, e, 300)

            # Get utility
            val = utility(new_agent)
            if val > opt_value:
                opt_value = val
                opt_choice = wall
                category = 0

        for cell in cell_options:
            print(colored("Testing environment", "red"))
            e = copy.deepcopy(ref)
            new_agent = connected_qlearn(agent, e, 300)

            # Get utility
            val = utility(new_agent)
            if val > opt_value:
                opt_value = val
                opt_choice = cell
                category = 1

        assert (category != -1)

        # Store found modification and change the reference environment
        if category == 0:
            mod = (0, opt_choice[0], opt_choice[1])
            ref = ref.transition([opt_choice])

        elif category == 1:
            mod = (1, opt_choice[0], opt_choice[1])

    # Evaluate utility
    total_agent = QAgent(ref)
    total_agent.qlearn(600, render=False)
    result = utility(total_agent)
    # print(colored(result, "red"))

    return greedy_seq, result
Exemplo n.º 4
    num_processes = 10
    processes = []
    manager = Manager()
    agents = manager.list()
    for i in range(rounds * num_processes):
        agents.append(0)  # keeper

    categories = []
    num_mods = 1

    map_to_numpy = np.asarray(map, dtype="c")
    env = TaxiEnv(map_to_numpy)  # reference environment

    orig_agent = QAgent(env)
    orig_agent.qlearn(600, show=False)
    cell_dict = cell_frequency(orig_agent)
    wall_dict = wall_interference(orig_agent)
    modifications = []

    for element in wall_dict:
        modifications.append((0, element[0]))
    for element in cell_dict[0:14]:
        row, col = element[0]
        modifications.append((1, (row, col)))

    for iter in range(rounds):
        print(colored("Data addition round {} begins!".format(iter), "red"))
        for i in range(num_processes):
            results = simulate_env(env, num_mods)
            modified = results[0]
Exemplo n.º 5
            index = random.randint(index + 1, N - k + i)


    return res

if num_mods == 6:
    num_trials = int(2e+6)

    num_trials = int(1e+6)

agent = QAgent(env)
agent.qlearn(600, show=False)
cell_dict = cell_frequency(agent)
wall_dict = wall_interference(agent)
modifications = []

for element in wall_dict:
    modifications.append((0, element[0][0], element[0][1]))
for element in cell_dict[0:14]:
    row, col = element[0]
    modifications.append((1, row, col))

# Initialize and build heap
sz = min(12 * num_mods, len(x_test))
h = Heap(model, x_test[0:sz], sz)

ls = []