예제 #1
0
    def generate_dataset(self):
        # This string represents the folder where the instances to be solved are located. The folder containing these
        # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders.
        base_path_instances = "".join([get_project_path(), self.input_path])

        # Create dictionary with heuristics
        heuristics = self.set_heuristics()

        # Creates array with the filenames of the instances to be solved
        file_names = [''.join([base_path_instances, file]) for file in os.listdir(base_path_instances)]

        # Output file name
        now = datetime.now()
        csv_file = "".join([get_project_path(), self.output_path, now.strftime('%Y'), now.strftime('%H'),
                            now.strftime('%d'), '_', now.strftime('%H'), now.strftime('%M'), now.strftime('%S'),
                            self.output_suffix_name, '.csv'])

        self.save_to_output(csv_file, headers=True)

        with Manager() as manager:
            executor = ProcessPoolExecutor()

            futures = [executor.submit(self.solve_instance, self.hyperparameters['n_iters'],
                                       self.hyperparameters['max_temp'], self.hyperparameters['min_temp'],
                                       self.hyperparameters['eq_iter'], self.hyperparameters['temp_change'],
                                       heuristics, file, csv_file) for file in file_names]

            [x.result() for x in futures]

            executor.shutdown()

        # Reading Data to process it into final dataset
        self.process_data(csv_file)
예제 #2
0
def get_data(relative_path="/Experiments/multiple_datasets/time_window_overlap_features/182_instances/dataset_oversampled_smote.csv"):
    absolute_path = "".join([get_project_path(), relative_path])

    xy = pd.read_csv(absolute_path)
    # Encode labels
    xy['heuristic'].replace(class2idx, inplace=True)

    # Separate features and labels
    # Features
    X = xy.drop(['heuristic'], axis=1).reset_index().drop(['index'], axis=1)
    # Labels
    y = pd.DataFrame(xy['heuristic']).reset_index().drop(['index'], axis=1)

    return X, y
    def optimization_function_sa(
            self, max_temp, min_temp, eq_iter, temp_change,
            interroute_relocate_select, cross_exchange_select,
            geni_exchange_select, interroute_2opt_select,
            interroute_exchange_select, interroute_relocate2_select,
            interroute_relocate3_select, intraroute_2opt_select,
            intraroute_exchange_select, intraroute_oropt_select,
            intraroute_relocate_select):
        """ Objective function or blackbox function used for tuning the hyperparameters of SA via Bayesian Optimization

        This function receives all SA hyperparameters and solves all instances in the base_path_instances folder. For the
        heuristics used as neighborhood operator, it creates a heuristics dictionary. This dictionary contains the name of
        the heuristic as key and as value an array where the first element is the heuristic function itself and the second
        element is an integer with value wither 1 or 0. Initially, a float value is received for each heuristic. This value
        is between 0 and 1. When the heuristic dictionary is initialized, the values are rounded and for each heuristic, if
        the received float value is greater than .5, the it will be rounded up to 1, otherwise when it is less than or equal
        to .5, it will be rounded down to 0. When an heuristic has a value of 1, this means that it will be used by the SA
        metaheuristic in the neighborhood operator. When it has a value of 0 it means it wont be used.

        For details on how the multiprocessing works, see the documentation for

        Manager: https://docs.python.org/3/library/multiprocessing.html#multiprocessing.sharedctypes.multiprocessing.Manager
        ProcessPoolExecutor:https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ProcessPoolExecutor


        :param max_temp: float value representing the initial temperature.
        :param min_temp: float value representing the final temperature to be reached, which is when the algorithm stops.
        :param eq_iter: integer value representing the iterations that the algorithm performs at each temperature step. That
               means that, everytime there is a change in temperature, the algorithm runs eq_iter times.
        :param temp_change: float value between 0 and 1 representing the proportion by which the temperature is decreased at
               each iteration. For example, is the current temperature is 100 and the temp_change is .95, then one change in
               temperature is defined as 100 * 0.95 = 95, and 95 will be the updated temperature.
        :param interroute_relocate_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param cross_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param geni_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param interroute_2opt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param interroute_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param interroute_relocate2_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5
               goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving
               instances. A value of 0 means it will not be used.
        :param interroute_relocate3_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5
               goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving
               instances. A value of 0 means it will not be used.
        :param intraroute_2opt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param intraroute_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param intraroute_oropt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :param intraroute_relocate_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes
               to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A
               value of 0 means it will not be used.
        :return: double value representing the negative average best cost over all solved instances. It has to be negative
                 because, since it is used with bayesian optimization, it tries to maximize.
        """

        # Make sure hyperparameters that should be integers, are in deed integers
        eq_iter = round(eq_iter)

        # This is an integer value representing the number of times that an instance is to be solved. This is used for when
        # trying to get a more accurate and robust value of the best cost when solving a particular instance. This is
        # because the average will be taken as the final value. So, if n_iters=10, then a particular instance will be solved
        # 10 and the final best cost will be taken as the average of the 10 runs.
        n_iters = 10

        # This string represents the folder where the instances to be solved are located. The folder containing these
        # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders.
        base_path_instances = "".join(
            [get_project_path(), self.relative_path_to_instances])

        # Create dictionary with heuristics
        heuristics = {
            "interroute_relocate":
            [interroute_relocate,
             round(interroute_relocate_select)],
            "cross_exchange": [cross_exchange,
                               round(cross_exchange_select)],
            "geni_exchange": [geni_exchange,
                              round(geni_exchange_select)],
            "interroute_2opt":
            [interroute_2opt, round(interroute_2opt_select)],
            "interroute_exchange":
            [interroute_exchange,
             round(interroute_exchange_select)],
            "interroute_relocate2":
            [interroute_relocate2,
             round(interroute_relocate2_select)],
            "interroute_relocate3":
            [interroute_relocate3,
             round(interroute_relocate3_select)],
            "intraroute_2opt":
            [intraroute_2opt, round(intraroute_2opt_select)],
            "intraroute_exchange":
            [intraroute_exchange,
             round(intraroute_exchange_select)],
            "intraroute_oropt":
            [intraroute_oropt,
             round(intraroute_oropt_select)],
            "intraroute_relocate":
            [intraroute_relocate,
             round(intraroute_relocate_select)]
        }

        # Creates array with the filenames of the instances to be solved
        file_names = [
            ''.join([base_path_instances, file])
            for file in os.listdir(base_path_instances)
        ]

        with Manager() as manager:
            executor = ProcessPoolExecutor()
            lock = manager.Lock()
            # Holds the cumulative sum of the best cost gotten from solving all instances
            total_cost = manager.Value('i', 0)

            futures = [
                executor.submit(self.solve_instance, total_cost, lock, n_iters,
                                max_temp, min_temp, eq_iter, temp_change,
                                heuristics, file) for file in file_names
            ]

            [x.result() for x in futures]

            # print("Total cost:", total_cost.value)
            # print("Average cost:", total_cost.value / len(os.listdir(base_path_instances)))

            # Average best cost over all solved instances
            avg_best_cost = total_cost.value / len(
                os.listdir(base_path_instances))

            executor.shutdown()

            # print(avg_best_cost)

        return -avg_best_cost
def main():
    # input_path = "/vrptw/instances/all_instances/"
    input_path = "/vrptw/instances/initial_instances/"
    # input_path = "/vrptw/instances/temp/"

    # This string represents the folder where the instances to be solved are located. The folder containing these
    # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders.
    base_path_instances = "".join([get_project_path(), input_path])

    # Creates array with the filenames of the instances to be solved
    file_names = [
        ''.join([base_path_instances, file])
        for file in os.listdir(base_path_instances)
    ]

    # Create array with heuristics
    heuristics_dict = {
        "interroute_relocate": interroute_relocate,
        "cross_exchange": cross_exchange,
        "geni_exchange": geni_exchange,
        "interroute_2opt": interroute_2opt,
        "interroute_exchange": interroute_exchange,
        "interroute_relocate2": interroute_relocate2,
        "interroute_relocate3": interroute_relocate3,
        "intraroute_2opt": intraroute_2opt,
        "intraroute_exchange": intraroute_exchange,
        "intraroute_oropt": intraroute_oropt,
        "intraroute_relocate": intraroute_relocate
    }

    # Create pandas df to save results
    column_names = [
        'used', 'feasible', 'improved', 'improved_value', 'best', 'best_value',
        'best_cost', 'problem', 'heuristic', 'avg_overlap', 'total_overlap',
        'std_overlap', 'skewness_overlap', 'kurtosis_overlap'
    ]

    df_results = pd.DataFrame(columns=column_names)

    N_ITERS = 10

    instance_counter = 0

    # Compute values for each instance
    for instance in file_names:
        instance_counter += 1
        heuristic_counter = 1
        for heuristic_name, heuristic in heuristics_dict.items():
            print('****** INSTANCE: {}/{} ****** HEURISTIC: {}/{}\n'.format(
                instance_counter, len(file_names), heuristic_counter,
                len(heuristics_dict)),
                  flush=True)
            heuristic_counter += 1
            iteration_counter = 1
            # Solve current instance n times with the same heuristic
            for iteration in range(0, N_ITERS):
                print('ITERATION: {}/{}\n'.format(iteration_counter, N_ITERS),
                      flush=True)
                iteration_counter += 1

                method = SimulatedAnnealing(max_temp=35,
                                            min_temp=0.1,
                                            eq_iter=50,
                                            temp_change=0.95,
                                            trace=False,
                                            histograms=False)

                method.add_heuristic(
                    PerturbativeHeuristic(heuristic_name, heuristic))

                method.solve(instance)

                # Get customers as data frame
                df_customers = pd.DataFrame(
                    list(map(vars, method.problem.customers)))

                # Process stats into dictionary
                data_dict = ast.literal_eval(method.heuristics[0].statistics())
                data_dict['best_cost'] = method.best_cost

                # Add data about problem to dict
                data_dict['problem'] = 'S' + str(method.problem.ncustomers -
                                                 1) + "_" + str(
                                                     method.problem.name)

                # Change key names
                data_dict['heuristic'] = data_dict.pop('name')

                # Add data about problem to dict
                data_dict['problem'] = 'S' + str(method.problem.ncustomers -
                                                 1) + "_" + str(
                                                     method.problem.name)
                data_dict['nvehicles'] = method.problem.nvehicles
                data_dict['vehicle_capacity'] = method.problem.capacity
                data_dict['ncustomers'] = method.problem.ncustomers - 1
                data_dict['avg_demand'] = round(
                    df_customers['demand'][1:].mean(), 4)
                data_dict['total_demand'] = df_customers['demand'][1:].sum()
                data_dict['x_depot'] = df_customers[df_customers['customer'] ==
                                                    0]['xcoord'].values[0]
                data_dict['y_depot'] = df_customers[df_customers['customer'] ==
                                                    0]['ycoord'].values[0]
                data_dict['std_demand'] = round(
                    df_customers['demand'][1:].std(), 2)
                data_dict['skewness_demand'] = round(
                    df_customers['demand'][1:].skew(), 4)
                data_dict['kurtosis_demand'] = round(
                    df_customers['demand'][1:].kurtosis(), 4)
                data_dict['std_prop_demand_capacity'] = \
                    round(np.std(df_customers['demand'][1:] / method.problem.capacity), 2)
                data_dict['prop_demand_capacity'] = df_customers['demand'][1:].sum() / \
                                                    (method.problem.capacity * method.problem.nvehicles)
                data_dict['prop_largest_custmr_demand_capacity'] = \
                    df_customers['demand'].max() / method.problem.capacity
                data_dict['avg_ncustmr_vehicle'] = (method.problem.ncustomers - 1) / \
                                                   method.problem.nvehicles
                data_dict['min_nvehicles'] = round(df_customers['demand'][1:].sum() / \
                                                   method.problem.capacity, 4)
                data_dict['avg_service_time'] = round(
                    df_customers['service_duration'][1:].mean(), 4)
                data_dict['sum_ready_time'] = df_customers['ready_time'][
                    1:].sum()
                data_dict['avg_tw'] = np.sum(df_customers['due_date'][1:] - \
                                             df_customers['ready_time'][1:]) / \
                                      (method.problem.ncustomers - 1)
                data_dict['std_tw'] = np.std(df_customers['due_date'][1:] - \
                                             df_customers['ready_time'][1:]) / \
                                      (method.problem.ncustomers - 1)
                data_dict['skewness_tw'] = round(pd.DataFrame(df_customers['due_date'][1:] \
                                                              - df_customers['ready_time'][1:])
                                                 .skew().values[0], 4)
                data_dict['kurtosis_tw'] = round(pd.DataFrame(df_customers['due_date'][1:] \
                                                              - df_customers['ready_time'][1:])
                                                 .kurtosis().values[0], 4)

                # TW overlap features
                data_dict['avg_overlap'], data_dict['total_overlap'], \
                data_dict['std_overlap'], data_dict['skewness_overlap'], \
                data_dict['kurtosis_overlap'] = compute_overlap(df_customers)

                # Append data tu results dataframe
                df_results = df_results.append(data_dict, ignore_index=True)

    df_results.to_csv("statistics_and_tw_overlap_features.csv", index=False)