示例#1
0
                policy.updateOffset(0.2)

            if td_lambda.name == "true online" or td_lambda.name == "trad online":
                td_lambda.episodeReset()
            if td_lambda.name == "nac":
                f_gradient_vec_diff = open(
                    "{0}{1}".format(
                        results_dir,
                        "/GradientVecDiff{0}.fso".format(episode_number)), "w",
                    1)
                if episode_number > 0:
                    td_lambda.decayStatistics(decay=0.0)

            # traces.reset()
            nessie.disable_behaviours(True)
            nessie.nav_reset()
            random_yaw = float(randrange(-314, 314, 1)) / 100.0
            possible_starting_states = [-1.57, 1.57]
            # random_yaw = possible_starting_states[episode_number % 2]
            print("YAWING to: {0}".format(random_yaw))
            nessie.pilot([0, 0, 0, 0, 0, 1.57])
            rospy.sleep(2.0)
            nessie.disable_behaviours(False)

            if episode_number % CONFIG["log_actions"] == 0:
                nessie.logNav(log_nav=True,
                              dir_path=results_dir,
                              file_name_descriptor=str(episode_number))
            if episode_number > 5 and exploration_sigma > 0.1:
                exploration_sigma *= CONFIG["exploration_decay"]
            cum_step_time = 0.0
            angle_dt = 0.0
            prev_action = None
            critic_value_func_params = None

            if episode_number == CONFIG["episode_fault_injected"]:
                thrusters.inject_surge_thruster_fault(17, 1)

            td_lambda.episodeReset()

            # Reset the last action value
            # last_yaw_mean = yaw_ros_action("gaussian_variance", 0.7, False, 1.0)

            # Enable behaviours
            ros_behaviour_interface.disable_behaviours(disable=True)
            ros_behaviour_interface.nav_reset()
            random_yaw = 1.57 + 0.785#float(random.randrange(-314, 314, 5)) / 100
            print("Moving to Random Starting Yaw: {0}".format(random_yaw))
            hover_completed = ros_behaviour_interface.pilot([0, 0, 0, 0, 0,random_yaw])
            rospy.sleep(5)
            ros_behaviour_interface.disable_behaviours(disable=False)
            print(hover_completed)
            rospy.sleep(0.5)    # give the nav topic callback to be called before the next episode is started.
                                # otherwise the next episode may start in a episode termination condition

            logNav(log_nav=True, dir_path=results_dir, file_name_descriptor=str(episode_number))

            # Loop number of steps
            for step_number in range(CONFIG["max_num_steps"]):
                # --------- STATE AND TRACES ---------
                # get current state
示例#3
0
class RepeatPilotRequest(object):
    def __init__(self):
        args = sys.argv
        if "-r" in args:
            self.results_dir_name = args[args.index("-r") + 1]
        else:
            self.results_dir_name = "repeat_pilot_request"

        self.position_normaliser = DynamicNormalizer([-2.4, 2.4], [-1.0, 1.0])
        self.position_deriv_normaliser = DynamicNormalizer([-1.75, 1.75],
                                                           [-1.0, 1.0])
        self.angle_normaliser = DynamicNormalizer([-3.14, 3.14], [-1.0, 1.0])
        self.angle_deriv_normaliser = DynamicNormalizer([-0.02, 0.02],
                                                        [-1.0, 1.0])

        self.angle_dt_moving_window = SlidingWindow(5)
        self.last_150_episode_returns = SlidingWindow(150)

        self.thrusters = Thrusters()
        self.env = ROSBehaviourInterface()
        self.environment_info = EnvironmentInfo()

        sub_pilot_position_controller_output = rospy.Subscriber(
            "/pilot/position_pid_output", FloatArray,
            self.positionControllerCallback)

        self.prev_action = 0.0
        self.pos_pid_output = np.zeros(6)

    def positionControllerCallback(self, msg):
        self.pos_pid_output = msg.values

    def update_state_t(self):
        raw_angle = deepcopy(self.environment_info.raw_angle_to_goal)
        # print("raw angle:")
        # raw_angle_dt = raw_angle - self.prev_angle_dt_t
        # print("raw angle dt: {0}".format(raw_angle_dt))
        self.state_t = {
            "angle": self.angle_normaliser.scale_value(raw_angle),
            "angle_deriv": self.prev_angle_dt_t
        }
        self.prev_angle_dt_t = deepcopy(raw_angle)

    def update_state_t_p1(self):
        raw_angle = deepcopy(self.environment_info.raw_angle_to_goal)
        angle_tp1 = self.angle_normaliser.scale_value(raw_angle)
        angle_t = self.state_t["angle"]

        abs_angle_tp1 = np.abs(angle_tp1)
        abs_angle_t = np.abs(angle_t)
        if abs_angle_tp1 > abs_angle_t:
            sign = -1
        else:
            sign = 1
        angle_change = sign * abs(abs_angle_tp1 - abs_angle_t)

        # print("angle t: {0}".format(abs_angle_t))
        # print("angle tp1: {0}".format(abs_angle_tp1))
        # print("angle change: {0}".format(angle_change))

        tmp_angle_change = sum(
            self.angle_dt_moving_window.getWindow(angle_change)) / 5.0
        self.state_t_plus_1 = {
            "angle":
            self.angle_normaliser.scale_value(raw_angle),
            "angle_deriv":
            self.angle_deriv_normaliser.scale_value(tmp_angle_change)
        }
        self.prev_angle_dt_t = self.angle_deriv_normaliser.scale_value(
            tmp_angle_change)

    def run(self):
        results_dir = "/home/gordon/data/tmp/{0}{1}".format(
            self.results_dir_name, 0)

        for run in range(CONFIG["num_runs"]):

            # Create logging directory and files
            if not os.path.exists(results_dir):
                os.makedirs(results_dir)
            filename = os.path.basename(sys.argv[0])
            os.system("cp {0} {1}".format(filename, results_dir))
            os.system(
                "cp /home/gordon/rosbuild_ws/ros_simple_rl/src/utilities/repeat_pilot_request.py {0}"
                .format(results_dir))

            # reset stuff for the run
            self.env.nav_reset()
            self.env.reset()
            self.angle_dt_moving_window.reset()
            self.prev_angle_dt_t = 0.0
            self.prev_angle_dt_tp1 = 0.0

            # create log file
            f_actions = open(
                "{0}{1}".format(results_dir, "/actions{0}.csv".format(run)),
                "w", 1)

            start_time = time.time()
            end_time = start_time + CONFIG["run_time"]

            first_step = True

            while time.time() < end_time and not rospy.is_shutdown():

                # send pilot request
                self.env.pilotPublishPositionRequest([0, 0, 0, 0, 0, 0])

                # perform a 'step'
                self.update_state_t()
                rospy.sleep(0.1)
                self.update_state_t_p1()

                # log the current state information
                if first_step:
                    first_step = False
                    state_keys = self.state_t.keys()
                    state_keys.append("action")
                    label_logging_format = "#{" + "}\t{".join(
                        [str(state_keys.index(el))
                         for el in state_keys]) + "}\n"
                    f_actions.write(label_logging_format.format(*state_keys))

                logging_list = self.state_t.values()
                logging_list.append(self.pos_pid_output[5])
                action_logging_format = "{" + "}\t{".join(
                    [str(logging_list.index(el))
                     for el in logging_list]) + "}\n"
                f_actions.write(action_logging_format.format(*logging_list))
示例#4
0
class OptimizePilotPid(object):
    def __init__(self):
        args = sys.argv
        if "-r" in args:
            results_dir_name = args[args.index("-r") + 1]
        else:
            results_dir_name = "ga_pid_tuning"
        self.results_dir = "/home/gordon/data/tmp/{0}{1}".format(
            results_dir_name, 0)

        if not os.path.exists(self.results_dir):
            os.makedirs(self.results_dir)
        filename = os.path.basename(sys.argv[0])
        os.system("cp {0} {1}".format(filename, self.results_dir))
        os.system(
            "cp /home/gordon/rosbuild_ws/ros_simple_rl/src/ga_optimization/ga.py {0}"
            .format(self.results_dir))

        # TODO - add DataFrame for evolution history and generation
        self.df_evolution_history = pd.DataFrame()
        self.f_evolution_history = open(
            "{0}{1}".format(self.results_dir, "/evolution_history.csv"), "w",
            1)

        self.position_normaliser = DynamicNormalizer([-2.4, 2.4], [-1.0, 1.0])
        self.position_deriv_normaliser = DynamicNormalizer([-1.75, 1.75],
                                                           [-1.0, 1.0])
        self.angle_normaliser = DynamicNormalizer([-3.14, 3.14], [-1.0, 1.0])
        self.angle_deriv_normaliser = DynamicNormalizer([-0.02, 0.02],
                                                        [-1.0, 1.0])

        self.angle_dt_moving_window = SlidingWindow(5)
        self.last_150_episode_returns = SlidingWindow(150)

        self.thrusters = Thrusters()
        self.env = ROSBehaviourInterface()
        self.environment_info = EnvironmentInfo()
        self.baseline_response = optimal_control_response()

        sub_pilot_position_controller_output = rospy.Subscriber(
            "/pilot/position_pid_output", FloatArray,
            self.positionControllerCallback)

        self.prev_action = 0.0
        self.pos_pid_output = np.zeros(6)

    def positionControllerCallback(self, msg):
        self.pos_pid_output = msg.values

    def update_state_t(self):
        raw_angle = deepcopy(self.environment_info.raw_angle_to_goal)
        # print("raw angle:")
        # raw_angle_dt = raw_angle - self.prev_angle_dt_t
        # print("raw angle dt: {0}".format(raw_angle_dt))
        self.state_t = {
            "angle": self.angle_normaliser.scale_value(raw_angle),
            "angle_deriv": self.prev_angle_dt_t
        }
        self.prev_angle_dt_t = deepcopy(raw_angle)

    def update_state_t_p1(self):
        raw_angle = deepcopy(self.environment_info.raw_angle_to_goal)
        angle_tp1 = self.angle_normaliser.scale_value(raw_angle)
        angle_t = self.state_t["angle"]

        abs_angle_tp1 = np.abs(angle_tp1)
        abs_angle_t = np.abs(angle_t)
        if abs_angle_tp1 > abs_angle_t:
            sign = -1
        else:
            sign = 1
        angle_change = sign * abs(abs_angle_tp1 - abs_angle_t)

        # print("angle t: {0}".format(abs_angle_t))
        # print("angle tp1: {0}".format(abs_angle_tp1))
        # print("angle change: {0}".format(angle_change))

        tmp_angle_change = sum(
            self.angle_dt_moving_window.getWindow(angle_change)) / 5.0
        self.state_t_plus_1 = {
            "angle":
            self.angle_normaliser.scale_value(raw_angle),
            "angle_deriv":
            self.angle_deriv_normaliser.scale_value(tmp_angle_change)
        }
        self.prev_angle_dt_t = self.angle_deriv_normaliser.scale_value(
            tmp_angle_change)

    def setPidGains(self, posP, posI, posD, velP, velI, velD):
        self.env.enable_pilot(False)
        rospy.set_param("/pilot/controller/pos_n/kp", float(posP))
        rospy.set_param("/pilot/controller/pos_n/ki", float(posI))
        rospy.set_param("/pilot/controller/pos_n/kd", float(posD))
        # rospy.set_param("/pilot/controller/vel_r/kp", str(velP))
        # rospy.set_param("/pilot/controller/vel_r/ki", str(velI))
        # rospy.set_param("/pilot/controller/vel_r/kd", str(velD))
        self.env.enable_pilot(True)

    def run(self):
        # Inputs of the equation.
        ga_state = [0.5, 0.0]

        # Preparing the population
        # Number of the weights we are looking to optimize.
        ga_num_weights = len(ga_state)

        pop_size = (CONFIG["sol_per_pop"], ga_num_weights)

        population = np.random.uniform(low=0.0, high=0.1, size=pop_size)
        f_generation_max = open(
            "{0}{1}".format(self.results_dir, "/generation_history.csv"), "w",
            1)
        global_individual_id = 0
        # TODO - loop generations
        for generation_id in range(CONFIG["num_generations"]):
            generation_df = pd.DataFrame()
            local_individual_id = 0
            print("------- New Generation ----------")
            # TODO - loop over population
            for individual in population:
                print("----Individual----")
                # TODO - Check for similar individual in history and if exists, just use it's fitness
                # individual_df = pd.DataFrame([tuple(individual)])
                # fudge the column names to be consistent with self.df_evolution_history
                # individual_df.rename(columns={0: 1, 1: 2}, inplace=True)

                run_response = True
                match_found = False
                if not self.df_evolution_history.empty:
                    match_found, first_occurance_idx, fitness = self.check_for_existing_individual(
                        individual, ga_num_weights)
                    if match_found:
                        # print("Match found in history for individual, using it's previous fitness: {0}".format(fitness))
                        run_response = False

                if run_response:
                    response = self.get_response(global_individual_id,
                                                 individual)
                    # print("response:")
                    # print_friendly_rounded = np.around(response[0:50, :], decimals=2)
                    # print(print_friendly_rounded)
                    individual_fitness = self.calculate_fitness(response)
                else:
                    individual_fitness = fitness

                individual_copy = deepcopy(individual)
                # gen_row2 = np.array([global_individual_id, individual_copy[0], individual_copy[1], individual_fitness])
                gen_row = pd.DataFrame()

                gen_row[0] = [global_individual_id
                              ] if not match_found else [first_occurance_idx]
                gen_row[1] = [individual_copy[0]]
                gen_row[2] = [individual_copy[1]]
                gen_row[3] = [individual_fitness]
                print("Row being added to generation_df: ")
                print(gen_row)
                generation_df = generation_df.append(gen_row,
                                                     ignore_index=True)
                # if local_individual_id == 0:
                #     gen_df_last_index = generation_df.columns.array[-1]
                #     generation_df.rename(columns={gen_df_last_index: 'fitness'}, inplace=True)

                evolution_row = pd.DataFrame()
                evolution_row[0] = [global_individual_id]
                evolution_row[1] = [individual_copy[0]]
                evolution_row[2] = [individual_copy[1]]
                evolution_row[3] = [individual_fitness]
                self.df_evolution_history = self.df_evolution_history.append(
                    evolution_row, ignore_index=True)
                # print("DataFrame Evolution History: {0}".format(global_individual_id))
                # print(self.df_evolution_history)
                # if global_individual_id == 0:
                #     self.df_evolution_history.rename(
                #                                 columns={self.df_evolution_history.columns.array[-1]: 'fitness'},
                #                                 inplace=True
                #     )
                self.log_entry(self.f_evolution_history, global_individual_id,
                               individual, individual_fitness)

                global_individual_id += 1
                local_individual_id += 1

            # fitness now calculated for population
            # print("----------")
            # print(generation_df.head(8))
            # print("-----=======")
            # print(self.df_evolution_history)
            # print("============")
            # Log the max fitness for the current generation
            best_fitness_idx = generation_df[
                generation_df.columns[-1]].idxmin()
            print("generation row idx of min fitness: {0}".format(
                best_fitness_idx))
            generation_max_fitness = generation_df.iloc[
                best_fitness_idx, generation_df.columns[-1]]
            generation_max_global_id = generation_df.iloc[best_fitness_idx][0]
            generation_max_fitness_individual = generation_df.iloc[
                best_fitness_idx][1:ga_num_weights + 1]
            self.log_entry(f_generation_max, generation_max_global_id,
                           generation_max_fitness_individual,
                           generation_max_fitness)

            fitness = deepcopy(generation_df.iloc[:, -1].array)

            print("Fitness array:")
            print(fitness)
            # Selecting the best parents in the population for mating.
            parents = select_mating_pool(population, fitness,
                                         CONFIG["num_parents_mating"])
            print("parents (best {0}): ".format(CONFIG["num_parents_mating"]))
            print(parents)

            offspring_crossover = crossover(
                parents,
                offspring_size=(pop_size[0] - parents.shape[0],
                                ga_num_weights))

            print("offspring_crossover: {0}".format(offspring_crossover))
            offspring_mutation = mutation(offspring_crossover, num_mutations=2)

            # Creating the new population based on the parents and offspring.
            population[0:parents.shape[0], :] = parents
            population[parents.shape[0]:, :] = offspring_mutation

    def check_for_existing_individual(self, individual, ga_num_weights):
        # print("check_for_existing_individual-> individual: {0}".format(individual))
        evolution_minus_fitness = self.df_evolution_history.iloc[:, 1:
                                                                 ga_num_weights
                                                                 + 1]
        # print("check_for_existing_individual-> evolution_minus_fitness: {0}".format(evolution_minus_fitness))

        match_found = False
        fitness = 9999999
        # TODO - remove loop and use more efficient numpy or pandas routine
        # individual_exists = evolution_minus_fitness[(evolution_minus_fitness == individual_df).all(axis=1)]
        for idx, row in evolution_minus_fitness.iterrows():
            is_close = np.allclose(individual, row.array, atol=0.01)
            if is_close:
                match_found = True
                history_entry = self.df_evolution_history.iloc[idx, :]
                print("match found: ")
                print(history_entry)
                fitness = history_entry.array[-1]
                break
        return match_found, idx, fitness

    def log_entry(self, file, id, individual, fitness):
        delimiter = "\t"
        ind_str = '\t'.join(map(str, individual))
        entry = "{0}{1}{2}{3}{4}\n".format(id, delimiter, ind_str, delimiter,
                                           fitness)
        file.write(entry)

    def calculate_fitness(self, response):
        diff = abs(response) - abs(self.baseline_response)
        max_idx = 100  # response.shape[0]
        step_errors = []

        for idx in range(max_idx):
            r = response[idx, 1]
            b = self.baseline_response[idx, 1]
            step_error = 0.0
            if r > b:
                step_error = r - b
            else:
                step_error = b - r
            step_errors.append(step_error)

        fitness = sum(step_errors)  # / len(step_errors)
        return fitness

    def get_response(self, id, individual):
        # reset stuff for the run
        self.env.nav_reset()
        # Set usable gains for DoHover action to get to initial position again
        # position sim gains: { "kp": 0.35, "ki": 0.0, "kd": 0.0 }
        # velocity sim gains: { "kp": 35.0, "ki": 0.0, "kd": 0.0 }
        self.setPidGains(0.35, 0, 0, 0, 0, 0)
        self.env.reset(disable_behaviours=False)
        self.angle_dt_moving_window.reset()
        self.prev_angle_dt_t = 0.0
        self.prev_angle_dt_tp1 = 0.0

        # Set the gains to those of the individual/solution
        self.setPidGains(individual[0], 0, individual[1], 0, 0, 0)

        # create log file
        f_actions = open(
            "{0}{1}".format(self.results_dir, "/actions{0}.csv".format(id)),
            "w", 1)

        start_time = time.time()
        end_time = start_time + CONFIG["run_time"]

        first_step = True
        response = np.zeros([350, 2])
        timestep = 0
        while time.time() < end_time and not rospy.is_shutdown():

            # send pilot request
            self.env.pilotPublishPositionRequest([0, 0, 0, 0, 0, 0])

            # perform a 'step'
            self.update_state_t()
            rospy.sleep(0.1)
            self.update_state_t_p1()

            # log the current state information
            if first_step:
                first_step = False
                state_keys = self.state_t.keys()
                state_keys.append("baseline_angle")
                state_keys.append("action")
                label_logging_format = "#{" + "}\t{".join(
                    [str(state_keys.index(el)) for el in state_keys]) + "}\n"
                f_actions.write(label_logging_format.format(*state_keys))

            logging_list = self.state_t.values()
            logging_list.append(self.baseline_response[timestep, 1])
            logging_list.append(self.pos_pid_output[5])
            action_logging_format = "{" + "}\t{".join(
                [str(logging_list.index(el)) for el in logging_list]) + "}\n"
            response[timestep, :] = [timestep, logging_list[0]]
            timestep += 1
            f_actions.write(action_logging_format.format(*logging_list))
        return response