def generate_dataset(self): # This string represents the folder where the instances to be solved are located. The folder containing these # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders. base_path_instances = "".join([get_project_path(), self.input_path]) # Create dictionary with heuristics heuristics = self.set_heuristics() # Creates array with the filenames of the instances to be solved file_names = [''.join([base_path_instances, file]) for file in os.listdir(base_path_instances)] # Output file name now = datetime.now() csv_file = "".join([get_project_path(), self.output_path, now.strftime('%Y'), now.strftime('%H'), now.strftime('%d'), '_', now.strftime('%H'), now.strftime('%M'), now.strftime('%S'), self.output_suffix_name, '.csv']) self.save_to_output(csv_file, headers=True) with Manager() as manager: executor = ProcessPoolExecutor() futures = [executor.submit(self.solve_instance, self.hyperparameters['n_iters'], self.hyperparameters['max_temp'], self.hyperparameters['min_temp'], self.hyperparameters['eq_iter'], self.hyperparameters['temp_change'], heuristics, file, csv_file) for file in file_names] [x.result() for x in futures] executor.shutdown() # Reading Data to process it into final dataset self.process_data(csv_file)
def get_data(relative_path="/Experiments/multiple_datasets/time_window_overlap_features/182_instances/dataset_oversampled_smote.csv"): absolute_path = "".join([get_project_path(), relative_path]) xy = pd.read_csv(absolute_path) # Encode labels xy['heuristic'].replace(class2idx, inplace=True) # Separate features and labels # Features X = xy.drop(['heuristic'], axis=1).reset_index().drop(['index'], axis=1) # Labels y = pd.DataFrame(xy['heuristic']).reset_index().drop(['index'], axis=1) return X, y
def optimization_function_sa( self, max_temp, min_temp, eq_iter, temp_change, interroute_relocate_select, cross_exchange_select, geni_exchange_select, interroute_2opt_select, interroute_exchange_select, interroute_relocate2_select, interroute_relocate3_select, intraroute_2opt_select, intraroute_exchange_select, intraroute_oropt_select, intraroute_relocate_select): """ Objective function or blackbox function used for tuning the hyperparameters of SA via Bayesian Optimization This function receives all SA hyperparameters and solves all instances in the base_path_instances folder. For the heuristics used as neighborhood operator, it creates a heuristics dictionary. This dictionary contains the name of the heuristic as key and as value an array where the first element is the heuristic function itself and the second element is an integer with value wither 1 or 0. Initially, a float value is received for each heuristic. This value is between 0 and 1. When the heuristic dictionary is initialized, the values are rounded and for each heuristic, if the received float value is greater than .5, the it will be rounded up to 1, otherwise when it is less than or equal to .5, it will be rounded down to 0. When an heuristic has a value of 1, this means that it will be used by the SA metaheuristic in the neighborhood operator. When it has a value of 0 it means it wont be used. For details on how the multiprocessing works, see the documentation for Manager: https://docs.python.org/3/library/multiprocessing.html#multiprocessing.sharedctypes.multiprocessing.Manager ProcessPoolExecutor:https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ProcessPoolExecutor :param max_temp: float value representing the initial temperature. :param min_temp: float value representing the final temperature to be reached, which is when the algorithm stops. :param eq_iter: integer value representing the iterations that the algorithm performs at each temperature step. That means that, everytime there is a change in temperature, the algorithm runs eq_iter times. :param temp_change: float value between 0 and 1 representing the proportion by which the temperature is decreased at each iteration. For example, is the current temperature is 100 and the temp_change is .95, then one change in temperature is defined as 100 * 0.95 = 95, and 95 will be the updated temperature. :param interroute_relocate_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param cross_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param geni_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param interroute_2opt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param interroute_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param interroute_relocate2_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param interroute_relocate3_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param intraroute_2opt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param intraroute_exchange_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param intraroute_oropt_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :param intraroute_relocate_select: float value between 0 and 1. The value is rounded and >.5 goes to 1 and <=.5 goes to 0. When the value is 1, it means the heuristic will be used by the metaheuristic when solving instances. A value of 0 means it will not be used. :return: double value representing the negative average best cost over all solved instances. It has to be negative because, since it is used with bayesian optimization, it tries to maximize. """ # Make sure hyperparameters that should be integers, are in deed integers eq_iter = round(eq_iter) # This is an integer value representing the number of times that an instance is to be solved. This is used for when # trying to get a more accurate and robust value of the best cost when solving a particular instance. This is # because the average will be taken as the final value. So, if n_iters=10, then a particular instance will be solved # 10 and the final best cost will be taken as the average of the 10 runs. n_iters = 10 # This string represents the folder where the instances to be solved are located. The folder containing these # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders. base_path_instances = "".join( [get_project_path(), self.relative_path_to_instances]) # Create dictionary with heuristics heuristics = { "interroute_relocate": [interroute_relocate, round(interroute_relocate_select)], "cross_exchange": [cross_exchange, round(cross_exchange_select)], "geni_exchange": [geni_exchange, round(geni_exchange_select)], "interroute_2opt": [interroute_2opt, round(interroute_2opt_select)], "interroute_exchange": [interroute_exchange, round(interroute_exchange_select)], "interroute_relocate2": [interroute_relocate2, round(interroute_relocate2_select)], "interroute_relocate3": [interroute_relocate3, round(interroute_relocate3_select)], "intraroute_2opt": [intraroute_2opt, round(intraroute_2opt_select)], "intraroute_exchange": [intraroute_exchange, round(intraroute_exchange_select)], "intraroute_oropt": [intraroute_oropt, round(intraroute_oropt_select)], "intraroute_relocate": [intraroute_relocate, round(intraroute_relocate_select)] } # Creates array with the filenames of the instances to be solved file_names = [ ''.join([base_path_instances, file]) for file in os.listdir(base_path_instances) ] with Manager() as manager: executor = ProcessPoolExecutor() lock = manager.Lock() # Holds the cumulative sum of the best cost gotten from solving all instances total_cost = manager.Value('i', 0) futures = [ executor.submit(self.solve_instance, total_cost, lock, n_iters, max_temp, min_temp, eq_iter, temp_change, heuristics, file) for file in file_names ] [x.result() for x in futures] # print("Total cost:", total_cost.value) # print("Average cost:", total_cost.value / len(os.listdir(base_path_instances))) # Average best cost over all solved instances avg_best_cost = total_cost.value / len( os.listdir(base_path_instances)) executor.shutdown() # print(avg_best_cost) return -avg_best_cost
def main(): # input_path = "/vrptw/instances/all_instances/" input_path = "/vrptw/instances/initial_instances/" # input_path = "/vrptw/instances/temp/" # This string represents the folder where the instances to be solved are located. The folder containing these # instances has to have the raw text file for each instance. They should not be grouped within more sub-folders. base_path_instances = "".join([get_project_path(), input_path]) # Creates array with the filenames of the instances to be solved file_names = [ ''.join([base_path_instances, file]) for file in os.listdir(base_path_instances) ] # Create array with heuristics heuristics_dict = { "interroute_relocate": interroute_relocate, "cross_exchange": cross_exchange, "geni_exchange": geni_exchange, "interroute_2opt": interroute_2opt, "interroute_exchange": interroute_exchange, "interroute_relocate2": interroute_relocate2, "interroute_relocate3": interroute_relocate3, "intraroute_2opt": intraroute_2opt, "intraroute_exchange": intraroute_exchange, "intraroute_oropt": intraroute_oropt, "intraroute_relocate": intraroute_relocate } # Create pandas df to save results column_names = [ 'used', 'feasible', 'improved', 'improved_value', 'best', 'best_value', 'best_cost', 'problem', 'heuristic', 'avg_overlap', 'total_overlap', 'std_overlap', 'skewness_overlap', 'kurtosis_overlap' ] df_results = pd.DataFrame(columns=column_names) N_ITERS = 10 instance_counter = 0 # Compute values for each instance for instance in file_names: instance_counter += 1 heuristic_counter = 1 for heuristic_name, heuristic in heuristics_dict.items(): print('****** INSTANCE: {}/{} ****** HEURISTIC: {}/{}\n'.format( instance_counter, len(file_names), heuristic_counter, len(heuristics_dict)), flush=True) heuristic_counter += 1 iteration_counter = 1 # Solve current instance n times with the same heuristic for iteration in range(0, N_ITERS): print('ITERATION: {}/{}\n'.format(iteration_counter, N_ITERS), flush=True) iteration_counter += 1 method = SimulatedAnnealing(max_temp=35, min_temp=0.1, eq_iter=50, temp_change=0.95, trace=False, histograms=False) method.add_heuristic( PerturbativeHeuristic(heuristic_name, heuristic)) method.solve(instance) # Get customers as data frame df_customers = pd.DataFrame( list(map(vars, method.problem.customers))) # Process stats into dictionary data_dict = ast.literal_eval(method.heuristics[0].statistics()) data_dict['best_cost'] = method.best_cost # Add data about problem to dict data_dict['problem'] = 'S' + str(method.problem.ncustomers - 1) + "_" + str( method.problem.name) # Change key names data_dict['heuristic'] = data_dict.pop('name') # Add data about problem to dict data_dict['problem'] = 'S' + str(method.problem.ncustomers - 1) + "_" + str( method.problem.name) data_dict['nvehicles'] = method.problem.nvehicles data_dict['vehicle_capacity'] = method.problem.capacity data_dict['ncustomers'] = method.problem.ncustomers - 1 data_dict['avg_demand'] = round( df_customers['demand'][1:].mean(), 4) data_dict['total_demand'] = df_customers['demand'][1:].sum() data_dict['x_depot'] = df_customers[df_customers['customer'] == 0]['xcoord'].values[0] data_dict['y_depot'] = df_customers[df_customers['customer'] == 0]['ycoord'].values[0] data_dict['std_demand'] = round( df_customers['demand'][1:].std(), 2) data_dict['skewness_demand'] = round( df_customers['demand'][1:].skew(), 4) data_dict['kurtosis_demand'] = round( df_customers['demand'][1:].kurtosis(), 4) data_dict['std_prop_demand_capacity'] = \ round(np.std(df_customers['demand'][1:] / method.problem.capacity), 2) data_dict['prop_demand_capacity'] = df_customers['demand'][1:].sum() / \ (method.problem.capacity * method.problem.nvehicles) data_dict['prop_largest_custmr_demand_capacity'] = \ df_customers['demand'].max() / method.problem.capacity data_dict['avg_ncustmr_vehicle'] = (method.problem.ncustomers - 1) / \ method.problem.nvehicles data_dict['min_nvehicles'] = round(df_customers['demand'][1:].sum() / \ method.problem.capacity, 4) data_dict['avg_service_time'] = round( df_customers['service_duration'][1:].mean(), 4) data_dict['sum_ready_time'] = df_customers['ready_time'][ 1:].sum() data_dict['avg_tw'] = np.sum(df_customers['due_date'][1:] - \ df_customers['ready_time'][1:]) / \ (method.problem.ncustomers - 1) data_dict['std_tw'] = np.std(df_customers['due_date'][1:] - \ df_customers['ready_time'][1:]) / \ (method.problem.ncustomers - 1) data_dict['skewness_tw'] = round(pd.DataFrame(df_customers['due_date'][1:] \ - df_customers['ready_time'][1:]) .skew().values[0], 4) data_dict['kurtosis_tw'] = round(pd.DataFrame(df_customers['due_date'][1:] \ - df_customers['ready_time'][1:]) .kurtosis().values[0], 4) # TW overlap features data_dict['avg_overlap'], data_dict['total_overlap'], \ data_dict['std_overlap'], data_dict['skewness_overlap'], \ data_dict['kurtosis_overlap'] = compute_overlap(df_customers) # Append data tu results dataframe df_results = df_results.append(data_dict, ignore_index=True) df_results.to_csv("statistics_and_tw_overlap_features.csv", index=False)