def archive_analysis(input_paths, output_path, lower_bound, upper_bound, functions, instances, dimensions): """Records all instances from the archives found in input_paths where any decision space value is lower than the lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values are skipped. The output consists of lines with the following format: [evaluation_number] [objective space values] [decision space values] Assumes one file contains one archive. """ # Check whether input path exists input_files = get_file_name_list(input_paths, ".adat") if len(input_files) == 0: raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths)) # Read the input files one by one and save the result in the output_path create_path(output_path) for input_file in input_files: try: (suite_name, function, instance, dimension) = parse_archive_file_name(input_file) if (function not in functions) or (dimension not in dimensions) or (dimension > 5): continue if not instance: raise PreprocessingWarning('Analysis does not work on files with multiple archives, use archive_split') if instance not in instances: continue except PreprocessingWarning as warning: print('Skipping file {}\n{}'.format(input_file, warning)) continue print(input_file) column_start = 3 column_end = 3 + dimension output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name, function, instance, dimension)) f_out = open(output_file, 'a') with open(input_file, 'r') as f_in: for line in f_in: if len(line) == 0 or line[0] == '%' or len(line.split()) < 4: continue else: for number in line.split()[column_start:column_end]: if (float(number) > upper_bound) or (float(number) < lower_bound): string = '\t'.join(line.split()[:column_end]) f_out.write('{}\n'.format(string)) f_out.close() remove_empty_file(output_file)
def log_reconstruct(input_path, output_path, algorithm_name, algorithm_info, functions, instances, dimensions): """Reconstructs the .info, .dat and .tdat files produced by the logger from the .adat files in the input_path. Takes into account only the given functions, instances and dimensions. If any .info, .dat and .tdat files of the same names already exist in the output_path, the new data is appended to them. """ suite_name = 'bbob-biobj' print('Reading archive information...') archive_info = ArchiveInfo(input_path, functions, instances, dimensions, False) function_string = archive_info.get_function_string() instance_string = archive_info.get_instance_string() dimension_string = archive_info.get_dimension_string() file_name_set = archive_info.get_file_name_set() print('Initializing the suite and observer...') suite_instance = 'instances: {}'.format(instance_string) suite_options = 'dimensions: {} function_indices: {}'.format( dimension_string, function_string) suite = Suite(suite_name, suite_instance, suite_options) observer_options = 'result_folder: {} algorithm_name: {} algorithm_info: "{}" log_nondominated: read'. \ format(output_path, algorithm_name, algorithm_info) observer = Observer(suite_name, observer_options) print('Reconstructing...') for input_file in file_name_set: (_suite_name, function, _instance, dimension) = parse_archive_file_name(input_file) with open(input_file, 'r') as f_in: print(input_file) problem = None objective_vector = None evaluation_found = False instance = None count_not_updated = 0 evaluation = 0 for line in f_in: if len(line.split()) < 3: continue elif line[0] == '%' and 'instance' in line: instance = int(get_key_value(line[1:], 'instance')) if instance in instances: if problem is not None: if not evaluation_found: raise PreprocessingWarning( 'Missing the line `% evaluations = ` in the previous ' 'problem. This problem is file = {}, instance = {}' .format(input_file, instance)) if count_not_updated > 0: print( '{} solutions did not update the archive'. format(count_not_updated)) problem.free() problem = suite.get_problem_by_function_dimension_instance( function, dimension, instance, observer) evaluation_found = False elif line[0] != '%' and instance in instances: try: split = line.split() evaluation = int(split[0]) objective_vector = np.array(split[1:3]) updated = problem.logger_biobj_feed_solution( evaluation, objective_vector) if updated == 0: count_not_updated += 1 except ValueError as error: print('Problem in file {}, line {}, skipping line\n{}'. format(input_file, line, error)) continue elif line[0] == '%' and 'evaluations' in line: old_evaluation = evaluation evaluation = int(get_key_value(line[1:], 'evaluations')) evaluation_found = True if ( evaluation > old_evaluation ) and problem is not None and objective_vector is not None: problem.logger_biobj_feed_solution( evaluation, objective_vector) if problem is not None: if not evaluation_found: print( 'Missing the line `% evaluations = ` in this or the previous problem. This is file = {}, ' 'instance = {}'.format(input_file, instance)) if count_not_updated > 0: print('{} solutions did not update the archive'.format( count_not_updated)) problem.free() f_in.close()
def archive_difference(first_path, second_path, differences, functions, instances, dimensions): """Outputs the differences between the matching archive files found in the first and second path. """ # Check whether first paths exist first_files = get_file_name_list(first_path, ".adat") if len(first_files) == 0: raise PreprocessingException( 'Folder {} does not exist or is empty'.format(first_path)) for i, first_file in enumerate(first_files): try: (suite_name, function, instance, dimension) = parse_archive_file_name(first_file) if not instance: raise PreprocessingWarning( 'Checking for differences does not work on files with multiple archives, ' 'use archive_split') if (function not in functions) or (instance not in instances) or ( dimension not in dimensions): continue except PreprocessingWarning as warning: print('Skipping file {}\n{}'.format(first_file, warning)) first_files[i] = '' continue print(first_file) # Check whether second paths exist second_files = get_file_name_list(second_path, ".adat") if len(second_files) == 0: raise PreprocessingException( 'Folder {} does not exist or is empty'.format(second_path)) for i, second_file in enumerate(second_files): try: (suite_name, function, instance, dimension) = parse_archive_file_name(second_file) if not instance: raise PreprocessingWarning( 'Checking for differences does not work on files with multiple archives, ' 'use archive_split') if (function not in functions) or (instance not in instances) or ( dimension not in dimensions): continue except PreprocessingWarning as warning: print('Skipping file {}\n{}'.format(second_file, warning)) second_files[i] = '' continue print(second_file) with open(differences, 'a') as f_out: for first_file in first_files: if first_file != '': file_name = os.path.basename(first_file) if file_name in [ os.path.basename(second_file) for second_file in second_files ]: second_file = os.path.join(second_path, file_name) with open(first_file, 'r') as f1: with open(second_file, 'r') as f2: # Find and output the differences diff = difflib.unified_diff(f1.readlines(), f2.readlines(), fromfile='f1', tofile='f2') f_out.write('{}\n'.format(file_name)) print(file_name) for line in diff: f_out.write(line) f2.close() f1.close() f_out.close()
def archive_thinning(input_path, output_path, thinning_precision, currently_nondominated, functions, instances, dimensions): """Performs thinning of all the archives in the input path and stores the thinned archives in the output path. Assumes one file contains one archive. For each archive, all input solutions are rounded according to the thinning precision (in the normalized objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that are currently nondominated within the thinned archive are output. The two extreme solutions are not output. If currently_nondominated is False, only the solutions that are contained in the final archive are output. In this case, the two extreme solutions are also output. """ # Check whether input path exists input_files = get_file_name_list(input_path, ".adat") if len(input_files) == 0: raise PreprocessingException( 'Folder {} does not exist or is empty'.format(input_path)) old_level = log_level('warning') for input_file in input_files: try: (suite_name, function, instance, dimension) = parse_archive_file_name(input_file) if not instance: raise PreprocessingWarning( 'Thinning does not work on files with multiple archives, use archive_split' ) if (function not in functions) or (instance not in instances) or ( dimension not in dimensions): continue except PreprocessingWarning as warning: print('Skipping file {}\n{}'.format(input_file, warning)) continue print(input_file) output_file = input_file.replace(input_path, output_path) create_path(os.path.dirname(output_file)) f_out = open(output_file, 'w') thinned_archive = Archive(suite_name, function, instance, dimension) thinned_solutions = 0 all_solutions = 0 extreme1_text = thinned_archive.get_next_solution_text() extreme2_text = thinned_archive.get_next_solution_text() extreme1 = [float(x) for x in extreme1_text.split()[1:3]] extreme2 = [float(x) for x in extreme2_text.split()[1:3]] ideal = [min(x, y) for x, y in zip(extreme1, extreme2)] nadir = [max(x, y) for x, y in zip(extreme1, extreme2)] normalization = [x - y for x, y in zip(nadir, ideal)] with open(input_file, 'r') as f_in: for line in f_in: if line[0] == '%': f_out.write(line) elif len(line) == 0 or len(line.split()) < 3: continue elif line.split()[0] == '0': # The line contains an extreme solution, do nothing all_solutions += 1 continue else: # The line contains a 'regular' solution try: # Fill the archives with the rounded solutions values wrt the different precisions f_original = [float(x) for x in line.split()[1:3]] f_normalized = [ (f_original[i] - ideal[i]) / normalization[i] for i in range(2) ] f_normalized = [ round(f_normalized[i] / thinning_precision) for i in range(2) ] f_normalized = [ ideal[i] + f_normalized[i] * thinning_precision for i in range(2) ] updated = thinned_archive.add_solution( f_normalized[0], f_normalized[1], line) except IndexError: print('Problem in file {}, line {}, skipping line'. format(input_file, line)) continue finally: all_solutions += 1 if currently_nondominated and (updated == 1): thinned_solutions += 1 f_out.write(line) if not currently_nondominated and (thinned_archive.number_of_solutions == 2): # Output the two extreme solutions if they are the only two in the archive f_out.write(extreme1_text) f_out.write(extreme2_text) thinned_solutions = 2 while not currently_nondominated: text = thinned_archive.get_next_solution_text() if text is None: break thinned_solutions += 1 f_out.write(text) print('original: {} thinned: {} ({:.2f}%)'.format( all_solutions, thinned_solutions, 100 * thinned_solutions / all_solutions)) f_out.close() log_level(old_level)