Пример #1
0
def train_obstacle_avoid_policy(obs_id):
    """
    Train a NN control policy that avoids obstacles
    :return:
    """

    # Create dictionary for each instance of rover and corresponding NN and EA population
    rd = RoverDomain()
    rd.use_saved_poi_configuration()
    rd.use_saved_obstacle_config()
    rd.use_saved_rover_training_configs()
    rov = Rover(0)
    ea = Ccea(rov.n_inputs, rov.n_hnodes, rov.n_outputs)
    ea.create_new_population()

    for gen in range(generations):
        ea.reset_fitness()
        if gen == 0:
            policy_id = 0
        else:
            policy_id = ea.n_elites
        while policy_id < ea.pop_size:
            for config_id in range(n_configs):
                rov.reset_rover(rd.rover_configs[config_id])  # Reset rover to initial conditions
                rov.get_network_weights(ea.population["pop{0}".format(policy_id)])  # Apply network weights from CCEA
                rd.update_rover_path(rov, -1)  # Record starting position of each rover

                for step_id in range(n_steps):
                    rov.rover_sensor_scan(rd.pois, rd.obstacles, n_poi, n_obstacles)
                    rov.step(rd.world_x, rd.world_y)
                    rd.update_rover_path(rov, step_id)

                    # Update fitness of policies using reward information
                    ea.fitness[policy_id] += avoid_obstacle_reward(obs_id, rd.obstacles, rov, rd.world_x, rd.world_y)
            ea.fitness[policy_id] /= n_configs
            policy_id += 1

        # Choose new parents and create new offspring population
        ea.down_select()

    best_pol_id = np.argmax(ea.fitness)
    best_policy = ea.population["pop{0}".format(best_pol_id)]

    return best_policy
Пример #2
0
def multi_reward_learning_single():
    """
    Trains the brain to choose between trained rover control policies
    :return:
    """

    policies = {}
    if train_new_policies == 1:
        policies = train_policies()
    else:
        pol_id = 0
        for poi_id in range(n_poi):
            policies['Policy{0}'.format(pol_id)] = use_saved_policy('TowardsPOI{0}'.format(poi_id))
            pol_id += 1
            policies['Policy{0}'.format(pol_id)] = use_saved_policy('AwayFromPOI{0}'.format(poi_id))
            pol_id += 1
        for obs_id in range(n_obstacles):
            policies['Policy{0}'.format(pol_id)] = use_saved_policy("AvoidObstacle{0}".format(obs_id))
            pol_id += 1

    print("Training The Brain")
    visualizer_rover_path = np.zeros((s_runs, (n_steps + 1), 3))
    for srun in range(s_runs):  # Perform statistical runs
        print("Run: %i" % srun)

        # Create dictionary for each instance of rover and corresponding NN and EA population
        rd = RoverDomain()
        rd.use_saved_poi_configuration()
        rd.use_saved_obstacle_config()
        rd.use_saved_rover_training_configs()
        rd.use_saved_rover_test_config()
        br = Brain()
        rov = Rover(0)
        ea = Ccea(br.n_inputs, br.n_hnodes, br.n_outputs)
        ea.create_new_population()

        # Train the Brain
        reward_history = []
        for gen in range(brain_gen):
            ea.reset_fitness()
            if gen == 0:
                policy_id = 0
            else:
                policy_id = ea.n_elites
            while policy_id < ea.pop_size:  # Each policy in CCEA is tested in teams
                for config_id in range(n_configs):
                    rov.reset_rover(rd.rover_configs[config_id])  # Reset rover to initial conditions
                    br.get_weights(ea.population["pop{0}".format(policy_id)])  # Apply network weights from CCEA
                    rd.update_rover_path(rov, -1)  # Record starting position of each rover

                    for step_id in range(n_steps):
                        rov.rover_sensor_scan(rd.pois, rd.obstacles, n_poi, n_obstacles)
                        state_vector = []
                        for bracket in range(8):
                            state_vector.append(rov.sensor_readings[bracket])

                        # Pick policy using brain's decision
                        br.get_inputs(state_vector)
                        br.get_outputs()
                        rov_policy = pick_policy(br.output_layer[0, 0], policies)
                        rov.get_network_weights(rov_policy)
                        rov.step(rd.world_x, rd.world_y)
                        rd.update_rover_path(rov, step_id)

                        # Update fitness of policies using reward information
                        global_reward = rd.step_based_global_reward(rov)
                        ea.fitness[policy_id] += global_reward

                ea.fitness[policy_id] /= n_configs
                policy_id += 1

            # Testing Phase (test best policies found so far) ---------------------------------------------------------
            rov.reset_rover(rd.rover_test_config)  # Reset rover to initial conditions
            policy_id = np.argmax(ea.fitness)
            br.get_weights(ea.population["pop{0}".format(policy_id)])  # Apply best set of weights to network
            rd.update_rover_path(rov, -1)

            for step_id in range(n_steps):
                rov.rover_sensor_scan(rd.pois, rd.obstacles, n_poi, n_obstacles)
                state_vector = []
                for bracket in range(8):
                    state_vector.append(rov.sensor_readings[bracket])

                # Pick policy using brain's decision
                br.get_inputs(state_vector)
                br.get_outputs()
                rov_policy = pick_policy(br.output_layer[0, 0], policies)
                rov.get_network_weights(rov_policy)
                rov.step(rd.world_x, rd.world_y)
                rd.update_rover_path(rov, step_id)

            global_reward = calc_global_reward(rd.rover_path, rd.pois, rd.obstacles)
            reward_history.append(global_reward)

            if gen == (brain_gen - 1):  # Save path at end of final generation
                visualizer_rover_path[srun] = rd.rover_path.copy()
                save_rover_path(visualizer_rover_path)

            # Choose new parents and create new offspring population
            ea.down_select()

        save_reward_history(reward_history, "Multi_Reward.csv")
    run_visualizer()
Пример #3
0
def rovers_dpp_rewards(reward_type):
    """
    Train rovers using the D++ reward
    :param reward_type:
    :return:
    """
    p = get_parameters()
    rd = RoverDomain(p)

    # Create dictionary for each instance of rover and corresponding NN and EA population
    rv = {}
    for rv_id in range(p["n_rovers"]):
        rv["AG{0}".format(rv_id)] = Rover(p, rv_id, rd.rover_positions[rv_id])
        rv["EA{0}".format(rv_id)] = Ccea(p)

    print("Reward Type: ", reward_type)
    print("Coupling Requirement: ", p["c_req"])

    for srun in range(p["s_runs"]):  # Perform statistical runs
        print("Run: %i" % srun)

        # Reset CCEA and NN new stat run
        for rv_id in range(
                p["n_rovers"]):  # Randomly initialize ccea populations
            rv["EA{0}".format(rv_id)].create_new_population()
        reward_history = []

        for gen in range(p["generations"]):
            for rv_id in range(p["n_rovers"]):
                rv["EA{0}".format(rv_id)].select_policy_teams()
            for team_number in range(
                    p["pop_size"]):  # Each policy in CCEA is tested in teams
                for rv_id in range(p["n_rovers"]):
                    rv["AG{0}".format(rv_id)].reset_rover(
                    )  # Reset rover to initial conditions
                    policy_id = int(
                        rv["EA{0}".format(rv_id)].team_selection[team_number])
                    weights = rv["EA{0}".format(rv_id)].population[
                        "pop{0}".format(policy_id)]
                    rv["AG{0}".format(rv_id)].get_network_weights(
                        weights)  # Apply network weights from CCEA
                    rd.update_rover_path(
                        rv["AG{0}".format(rv_id)], rv_id,
                        -1)  # Record starting position of each rover

                for step_id in range(p["n_steps"]):
                    # Rover scans environment and constructs state vector
                    for rv_id in range(p["n_rovers"]):
                        rv["AG{0}".format(rv_id)].rover_sensor_scan(
                            rv, rd.pois, p["n_rovers"], p["n_poi"])

                    # Rover processes scan information and acts
                    for rv_id in range(p["n_rovers"]):
                        rv["AG{0}".format(rv_id)].step(p["x_dim"], p["y_dim"])
                        rd.update_rover_path(rv["AG{0}".format(rv_id)], rv_id,
                                             step_id)

                # Update fitness of policies using reward information
                global_reward = calc_global_reward(p, rd.rover_path, rd.pois)
                dpp_rewards = calc_dpp_reward(p, rd.rover_path, rd.pois,
                                              global_reward)
                for rv_id in range(p["n_rovers"]):
                    policy_id = int(
                        rv["EA{0}".format(rv_id)].team_selection[team_number])
                    rv["EA{0}".format(
                        rv_id)].fitness[policy_id] = dpp_rewards[rv_id]

            # Testing Phase (test best policies found so far) ---------------------------------------------------------
            for rv_id in range(p["n_rovers"]):
                rv["AG{0}".format(
                    rv_id)].reset_rover()  # Reset rover to initial conditions
                policy_id = np.argmax(rv["EA{0}".format(rv_id)].fitness)
                weights = rv["EA{0}".format(rv_id)].population["pop{0}".format(
                    policy_id)]
                rv["AG{0}".format(rv_id)].get_network_weights(
                    weights)  # Apply best set of weights to network
                rd.update_rover_path(rv["AG{0}".format(rv_id)], rv_id, -1)

            for step_id in range(p["n_steps"]):
                # Rover scans environment and constructs state vector
                for rv_id in range(p["n_rovers"]):
                    rv["AG{0}".format(rv_id)].rover_sensor_scan(
                        rv, rd.pois, p["n_rovers"], p["n_poi"])

                # Rover processes information from scan and acts
                for rv_id in range(p["n_rovers"]):
                    rv["AG{0}".format(rv_id)].step(p["x_dim"], p["y_dim"])
                    rd.update_rover_path(rv["AG{0}".format(rv_id)], rv_id,
                                         step_id)

            global_reward = calc_global_reward(p, rd.rover_path, rd.pois)
            reward_history.append(global_reward)

            if gen == (p["generations"] -
                       1):  # Save path at end of final generation
                save_rover_path(p, rd.rover_path)

            # Choose new parents and create new offspring population
            for rv_id in range(p["n_rovers"]):
                rv["EA{0}".format(rv_id)].down_select()

        save_reward_history(reward_history, "DPP_Reward.csv")
    run_visualizer(p)
Пример #4
0
def main():
    sc = sequenceClassifier()
    ag = Agent()
    cc = Ccea()
    nn = NeuralNetwork()

    if p["create_new_sets"] == 1:
        sc.create_training_set()
        sc.save_training_set()
        sc.create_test_set()
        sc.save_test_set()
    else:
        sc.load_training_set()
        sc.load_test_set()

    for s in range(p["s_runs"]):
        print("Stat Run: ", s)
        # Training
        training_reward_history = []
        test_reward_history = []
        state_vec = np.ones(p["n_inputs"])

        cc.create_new_population()

        for gen in range(p["generations"]):
            print("Gen: ", gen)
            pop_id = cc.n_elites
            while pop_id < p["pop_size"]:  # Test each set of weights in EA
                nn.reset_nn()
                nn.get_weights(cc.population["pop{0}".format(pop_id)])
                fitness_score = 0.0

                for seq in range(p["train_set_size"]):
                    ag.reset_mem_block()
                    nn.clear_outputs()
                    seq_len = len(sc.training_set["set{0}".format(seq)])
                    current_sequence = sc.training_set["set{0}".format(
                        seq)].copy()

                    for num in range(seq_len):
                        state_vec[0] = current_sequence[num]
                        nn.run_neural_network(state_vec, ag.mem_block)
                        ag.update_memory(nn.wgate_outputs, nn.encoded_memory)

                    if nn.out_layer[0] < 0.5 and sc.training_set_answers[
                            seq, 2] == -1:
                        fitness_score += 1
                    elif nn.out_layer[0] >= 0.5 and sc.training_set_answers[
                            seq, 2] == 1:
                        fitness_score += 1

                cc.fitness[pop_id] = fitness_score / p["train_set_size"]
                pop_id += 1

            # Testing
            nn.reset_nn()
            state_vec = np.ones(p["n_inputs"])
            best_pol_id = np.argmax(
                cc.fitness)  # Find the best policy in the population currently
            nn.get_weights(cc.population["pop{0}".format(best_pol_id)])
            test_reward = 0.0

            for seq in range(p["test_set_size"]):
                ag.reset_mem_block()
                nn.clear_outputs()
                seq_len = len(sc.test_set["set{0}".format(seq)])
                current_sequence = sc.test_set["set{0}".format(seq)].copy()

                for num in range(seq_len):
                    state_vec[0] = current_sequence[num]
                    nn.run_neural_network(state_vec, ag.mem_block)
                    ag.update_memory(nn.block_output, nn.wgate_outputs)

                if nn.out_layer[0] < 0.5 and sc.training_set_answers[seq,
                                                                     2] == -1:
                    test_reward += 1
                elif nn.out_layer[0] >= 0.5 and sc.training_set_answers[
                        seq, 2] == 1:
                    test_reward += 1

            test_reward_history.append(test_reward / p["test_set_size"])
            training_reward_history.append(max(cc.fitness))
            cc.down_select()

        save_reward_history(training_reward_history, "Training_Fitness.csv")
        save_reward_history(test_reward_history, "Test_Reward.csv")