Exemplo n.º 1
0
def standard_global():
    """
    Run rover domain using counterfactual suggestions for D++
    """

    # Parameters
    stat_runs = p["stat_runs"]
    generations = p["generations"]
    population_size = p["pop_size"]
    n_rovers = p["n_rovers"]
    domain_type = p["g_type"]
    rover_steps = p["steps"]

    rd = RoverDomain(new_world=False)

    # Create dictionary for each instance of rover and corresponding NN and EA population
    rovers = {}
    for rover_id in range(n_rovers):
        rovers["Rover{0}".format(rover_id)] = Rover(rover_id)
        rovers["EA{0}".format(rover_id)] = Ccea(population_size)

    final_rover_path = np.zeros((stat_runs, n_rovers, rover_steps + 1, 3))
    for srun in range(stat_runs):  # Perform statistical runs
        print("Run: %i" % srun)

        # World Configuration Setup
        rd.inital_world_setup(srun)
        for rover_id in range(
                n_rovers):  # Randomly initialize ccea populations
            rovers["Rover{0}".format(rover_id)].initialize_rover(srun)
            rovers["EA{0}".format(rover_id)].create_new_population(
            )  # Create new CCEA population
        reward_history = []

        for gen in range(generations):
            for rover_id in range(n_rovers):
                rovers["EA{0}".format(rover_id)].select_policy_teams()
            for team_number in range(
                    population_size):  # Each policy in CCEA is tested in teams
                for rover_id in range(n_rovers):
                    rovers["Rover{0}".format(rover_id)].reset_rover()
                    policy_id = int(rovers["EA{0}".format(
                        rover_id)].team_selection[team_number])
                    weights = rovers["EA{0}".format(rover_id)].population[
                        "pol{0}".format(policy_id)]
                    rovers["Rover{0}".format(rover_id)].get_weights(weights)
                for rover_id in range(
                        n_rovers):  # Rover runs initial scan of environment
                    rovers["Rover{0}".format(rover_id)].scan_environment(
                        rovers, rd.pois, n_rovers)

                g_rewards = np.zeros(rover_steps)
                for step_id in range(rover_steps):
                    for rover_id in range(
                            n_rovers
                    ):  # Rover processes scan information and acts
                        rovers["Rover{0}".format(rover_id)].step(
                            rd.world_x, rd.world_y)
                    for rover_id in range(n_rovers):  # Rovers scan environment
                        rovers["Rover{0}".format(rover_id)].scan_environment(
                            rovers, rd.pois, n_rovers)
                        rd.update_observer_distances(
                            rover_id,
                            rovers["Rover{0}".format(rover_id)].poi_distances)
                    if domain_type == "Loose":
                        g_rewards[step_id] = rd.calc_global_loose()
                    else:
                        g_rewards[step_id] = rd.calc_global_tight()

                # Update fitness of policies using reward information
                for rover_id in range(n_rovers):
                    policy_id = int(rovers["EA{0}".format(
                        rover_id)].team_selection[team_number])
                    rovers["EA{0}".format(rover_id)].fitness[policy_id] = max(
                        g_rewards)

            # Testing Phase (test best policies found so far)
            if gen % 10 == 0 or gen == generations - 1:
                for rover_id in range(n_rovers):
                    rovers["Rover{0}".format(rover_id)].reset_rover()
                    policy_id = np.argmax(
                        rovers["EA{0}".format(rover_id)].fitness)
                    weights = rovers["EA{0}".format(rover_id)].population[
                        "pol{0}".format(policy_id)]
                    rovers["Rover{0}".format(rover_id)].get_weights(weights)

                    if gen == generations - 1:
                        # Record Initial Rover Position
                        final_rover_path[srun, rover_id, 0, 0] = rovers[
                            "Rover{0}".format(rover_id)].pos[0]
                        final_rover_path[srun, rover_id, 0, 1] = rovers[
                            "Rover{0}".format(rover_id)].pos[1]
                        final_rover_path[srun, rover_id, 0, 2] = rovers[
                            "Rover{0}".format(rover_id)].pos[2]
                for rover_id in range(
                        n_rovers):  # Rover runs initial scan of environment
                    rovers["Rover{0}".format(rover_id)].scan_environment(
                        rovers, rd.pois, n_rovers)

                g_rewards = np.zeros(rover_steps)
                for step_id in range(rover_steps):
                    for rover_id in range(
                            n_rovers
                    ):  # Rover processes information froms can and acts
                        rovers["Rover{0}".format(rover_id)].step(
                            rd.world_x, rd.world_y)
                        if gen == generations - 1:
                            # Record Position of Each Rover
                            final_rover_path[srun, rover_id, step_id + 1,
                                             0] = rovers["Rover{0}".format(
                                                 rover_id)].pos[0]
                            final_rover_path[srun, rover_id, step_id + 1,
                                             1] = rovers["Rover{0}".format(
                                                 rover_id)].pos[1]
                            final_rover_path[srun, rover_id, step_id + 1,
                                             2] = rovers["Rover{0}".format(
                                                 rover_id)].pos[2]
                    for rover_id in range(n_rovers):  # Rover scans environment
                        rovers["Rover{0}".format(rover_id)].scan_environment(
                            rovers, rd.pois, n_rovers)
                        rd.update_observer_distances(
                            rover_id,
                            rovers["Rover{0}".format(rover_id)].poi_distances)
                    if domain_type == "Loose":
                        g_rewards[step_id] = rd.calc_global_loose()
                    else:
                        g_rewards[step_id] = rd.calc_global_tight()

                reward_history.append(max(g_rewards))

            for rover_id in range(n_rovers):
                rovers["EA{0}".format(rover_id)].down_select(
                )  # Choose new parents and create new offspring population

        save_reward_history(reward_history, "Global_Reward.csv")
        for rover_id in range(n_rovers):
            policy_id = np.argmax(rovers["EA{0}".format(rover_id)].fitness)
            weights = rovers["EA{0}".format(rover_id)].population[
                "pol{0}".format(policy_id)]
            save_best_policies(weights, srun,
                               "RoverWeights{0}".format(rover_id), rover_id)

    save_rover_path(final_rover_path, "Rover_Paths")
Exemplo n.º 2
0
def test_suggestions(sgst):
    """
    Test trained behavior selection policies
    """

    # Parameters
    stat_runs = p["stat_runs"]
    n_rovers = p["n_rovers"]
    domain_type = p["g_type"]
    rover_steps = p["steps"]
    n_suggestions = p["n_suggestions"]

    rd = RoverDomain(new_world=False)

    # Create dictionary for each instance of rover and corresponding NN and EA population
    rovers = {}
    for rover_id in range(n_rovers):
        rovers["Rover{0}".format(rover_id)] = Rover(rover_id)
        rovers["SN{0}".format(rover_id)] = SuggestionNetwork(
            n_suggestions + 8, n_suggestions, 10)

    final_rover_path = np.zeros((stat_runs, n_rovers, rover_steps + 1, 3))
    reward_history = []
    for srun in range(stat_runs):
        print("Run: %i" % srun)

        # Load World Configuration
        rd.inital_world_setup(srun)
        for rover_id in range(n_rovers):
            rovers["Rover{0}".format(rover_id)].initialize_rover(srun)

        # Load Pre-Trained Policies
        policy_bank = {}
        for rover_id in range(n_rovers):
            for pol_id in range(n_suggestions):
                policy_bank["Rover{0}Policy{1}".format(
                    rover_id, pol_id)] = load_saved_policies(
                        "GoTowardsPOI{}".format(pol_id), rover_id, srun)
            weights = load_saved_policies(
                "SelectionWeights{0}".format(rover_id), rover_id, srun)
            rovers["SN{0}".format(rover_id)].get_weights(weights)

        for rover_id in range(n_rovers):
            suggestion = np.ones(n_suggestions) * -1
            if domain_type == "Loose":
                suggestion[rover_id] = 1
            else:
                if rover_id % 2 == 0:
                    suggestion[0] = 1
                else:
                    suggestion[1] = 1
            rovers["Rover{0}".format(rover_id)].reset_rover()
            rovers["Rover{0}".format(rover_id)].scan_environment(
                rovers, rd.pois, n_rovers)
            sensor_input = np.concatenate(
                (suggestion,
                 rovers["Rover{0}".format(rover_id)].sensor_readings),
                axis=0)
            rovers["SN{0}".format(rover_id)].get_inputs(sensor_input)
            policy_outputs = rovers["SN{0}".format(rover_id)].get_outputs()
            rovers["Rover{0}".format(rover_id)].update_policy_belief(
                policy_outputs)
            chosen_pol = np.argmax(
                rovers["Rover{0}".format(rover_id)].policy_belief)
            weights = policy_bank["Rover{0}Policy{1}".format(
                rover_id, chosen_pol)]
            rovers["Rover{0}".format(rover_id)].get_weights(weights)

            # Record Initial Rover Position
            final_rover_path[srun, rover_id, 0,
                             0] = rovers["Rover{0}".format(rover_id)].pos[0]
            final_rover_path[srun, rover_id, 0,
                             1] = rovers["Rover{0}".format(rover_id)].pos[1]
            final_rover_path[srun, rover_id, 0,
                             2] = rovers["Rover{0}".format(rover_id)].pos[2]

        global_reward = []
        for step_id in range(rover_steps):
            for rover_id in range(
                    n_rovers
            ):  # Rover interprets suggestion and chooses policy
                suggestion = np.ones(n_suggestions) * -1
                if domain_type == "Loose":
                    suggestion[rover_id] = 1
                else:
                    if rover_id % 2 == 0:
                        suggestion[0] = 1
                    else:
                        suggestion[1] = 1
                sensor_input = np.concatenate(
                    (suggestion,
                     rovers["Rover{0}".format(rover_id)].sensor_readings),
                    axis=0)
                rovers["SN{0}".format(rover_id)].get_inputs(sensor_input)
                policy_outputs = rovers["SN{0}".format(rover_id)].get_outputs()
                rovers["Rover{0}".format(rover_id)].update_policy_belief(
                    policy_outputs)
                chosen_pol = np.argmax(
                    rovers["Rover{0}".format(rover_id)].policy_belief)
                weights = policy_bank["Rover{0}Policy{1}".format(
                    rover_id, chosen_pol)]
                rovers["Rover{0}".format(rover_id)].get_weights(weights)
            for rover_id in range(
                    n_rovers
            ):  # Rover processes information from scan and acts
                rovers["Rover{0}".format(rover_id)].step(
                    rd.world_x, rd.world_y)
                # Record Position of Each Rover
                final_rover_path[srun, rover_id, step_id + 1, 0] = rovers[
                    "Rover{0}".format(rover_id)].pos[0]
                final_rover_path[srun, rover_id, step_id + 1, 1] = rovers[
                    "Rover{0}".format(rover_id)].pos[1]
                final_rover_path[srun, rover_id, step_id + 1, 2] = rovers[
                    "Rover{0}".format(rover_id)].pos[2]
            for rover_id in range(n_rovers):
                rovers["Rover{0}".format(rover_id)].scan_environment(
                    rovers, rd.pois, n_rovers)
                rd.update_observer_distances(
                    rover_id,
                    rovers["Rover{0}".format(rover_id)].poi_distances)

            if domain_type == "Loose":
                global_reward.append(rd.calc_global_loose())
            elif domain_type == "Tight":
                global_reward.append(rd.calc_global_tight())
        reward_history.append(max(global_reward))

    save_reward_history(reward_history, "DBSS_Rewards.csv")
    save_rover_path(final_rover_path, "Rover_Paths")