Exemplo n.º 1
0
def archive_analysis(input_path, output_path, lower_bound, upper_bound):
    """
    Records all instances from the archives found in input_path where any decision space value is lower than the
    lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
    are skipped. The output is saved in one file per problem and consists of lines with the following format:
    [evaluation_number] [objective space values] [decision space values]
    """

    # Check whether input path exits
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    lb = float(lower_bound)
    ub = float(upper_bound)

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, dimension) = parse_archive_file_name(input_file)
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))

        if dimension > 5:
            continue

        print(input_file)
        column_start = 3
        column_end = 3 + dimension

        f_out = None
        f_name = ""

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if line[0] == '%' and 'instance' in line:
                    if f_out and not f_out.closed:
                        f_out.close()
                        remove_empty_file(f_name)
                    instance = int(get_key_value(line[1:], 'instance').strip(' \t\n\r'))
                    f_name = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                        function,
                                                                                                        instance,
                                                                                                        dimension))
                    f_out = open(f_name, 'a')
                elif len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > ub) or (float(number) < lb):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write("{}\n".format(string))
            f_in.close()
        if f_out and not f_out.closed:
            f_out.close()
            remove_empty_file(f_name)
Exemplo n.º 2
0
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue
                elif line[0] == '%':
                    if 'instance' in line:
                        instance = int(get_key_value(line[1:], 'instance'))
                        if f_out and not f_out.closed:
                            f_out.close()
                        output_file = os.path.join(output_path,
                                                   '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(suite_name,
                                                                                                         function,
                                                                                                         instance,
                                                                                                         dimension))
                        f_out = open(output_file, 'w')
                    buffered_lines += line

                elif (line[0] != '%') and (instance in instances):
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            f_out.close()
Exemplo n.º 3
0
def archive_analysis(input_paths, output_path, lower_bound, upper_bound, functions, instances, dimensions):
    """Records all instances from the archives found in input_paths where any decision space value is lower than the
       lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
       are skipped. The output consists of lines with the following format:
       [evaluation_number] [objective space values] [decision space values]
       Assumes one file contains one archive.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions) or (dimension > 5):
                continue
            if not instance:
                raise PreprocessingWarning('Analysis does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        column_start = 3
        column_end = 3 + dimension
        output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                 function,
                                                                                                 instance,
                                                                                                 dimension))
        f_out = open(output_file, 'a')

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > upper_bound) or (float(number) < lower_bound):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write('{}\n'.format(string))

        f_out.close()
        remove_empty_file(output_file)
Exemplo n.º 4
0
def archive_thinning(input_path, output_path, thinning_precision, currently_nondominated, functions, instances,
                     dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Thinning does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [(f_original[i] - ideal[i]) / normalization[i] for i in range(2)]
                        f_normalized = [round(f_normalized[i] / thinning_precision) for i in range(2)]
                        f_normalized = [ideal[i] + f_normalized[i] * thinning_precision for i in range(2)]
                        updated = thinned_archive.add_solution(f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(all_solutions, thinned_solutions,
                                                          100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)
Exemplo n.º 5
0
def reformat_archives(input_path, output_path, functions, instances, dimensions):
    """
    The names of the files in the input_path have the following notation:
    f[f1]-[f2]_i[i1]-[i2]_[d]D.txt
    where f1 and f2 are function numbers used for the first and second objective, i1 and i2 are instance numbers of the
    two functions and d is the dimension (one among 2, 3, 5, 10 and 20). Each such file starts with a few lines of
    comments that start with '#', after which each line corresponds to one solutions. In files with d <= 5 the solution
    is represented by its decision and objective vector values, while files with d > 5 contain only objective vector
    values of each solution.

    The output files to be written to output_path have the following notation:
    [suite_name]_f[F]_i[I]_d[D]_nondominated.adat
    where F is the function number in the suite, I is the instance number and D is the dimension. One file contains
    only one instance and starts with a line '% instance = I', where I is the instance number and is followed by a
    commented line (starting with '%'). In the subsequent lines, the solutions are written in the following format:
    num obj1 obj2 dec1 ... decn
    where num is the evaluation number of the solution (0 for extreme solutions and 1 for solutions read from the old
    file format), obj1 and obj2 are its objective values, and dec1, ... are its decision values (if they are given).

    Note this implementation is concerned only with the 'bbob-biobj' suite and applies reformatting only on the archive
    files that correspond to the problems contained in this suite.

    :param input_path: path to the folder with input archives
    :param output_path: path to the folder where output archives are stored to, if any files already exist there, they
    get appended to
    :param functions: list of function numbers to be included in the reformatting
    :param instances: list of instance numbers to be included in the reformatting
    :param dimensions: list of dimensions to be included in the reformatting
    """
    suite_name = 'bbob-biobj'
    print('Reformatting archive files for the {} suite...'.format(suite_name))

    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    # Create output folder if it does not exist yet
    create_path(output_path)

    # Read the input files one by one
    for input_file in input_files:

        try:
            (function, instance, dimension) = parse_old_arhive_file_name(input_file)
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        # Open the output file
        output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(suite_name,
                                                                                                      function,
                                                                                                      instance,
                                                                                                      dimension))

        with open(input_file, 'r') as f_in:
            with open(output_file, 'a') as f_out:
                # Perform reformatting

                print(input_file)
                f_out.write('% instance = {}\n%\n'.format(instance))

                for line in f_in:
                    if line[0] == '#':
                        continue

                    if dimension <= 5:
                        f_out.write('1 \t{} \t{}\n'.format(' \t'.join(line.split()[dimension:dimension+2]),
                                                           ' \t'.join(line.split()[0:dimension])))
                    else:
                        f_out.write('1 \t{}\n'.format(' \t'.join(line.split()[0:2])))

            f_out.close()
        f_in.close()
    print('Done!')
Exemplo n.º 6
0
def reformat_archives(input_path, output_path, functions, instances,
                      dimensions):
    """
    The names of the files in the input_path have the following notation:
    f[f1]-[f2]_i[i1]-[i2]_[d]D.txt
    where f1 and f2 are function numbers used for the first and second objective, i1 and i2 are instance numbers of the
    two functions and d is the dimension (one among 2, 3, 5, 10 and 20). Each such file starts with a few lines of
    comments that start with '#', after which each line corresponds to one solutions. In files with d <= 5 the solution
    is represented by its decision and objective vector values, while files with d > 5 contain only objective vector
    values of each solution.

    The output files to be written to output_path have the following notation:
    [suite_name]_f[F]_i[I]_d[D]_nondominated.adat
    where F is the function number in the suite, I is the instance number and D is the dimension. One file contains
    only one instance and starts with a line '% instance = I', where I is the instance number and is followed by a
    commented line (starting with '%'). In the subsequent lines, the solutions are written in the following format:
    num obj1 obj2 dec1 ... decn
    where num is the evaluation number of the solution (0 for extreme solutions and 1 for solutions read from the old
    file format), obj1 and obj2 are its objective values, and dec1, ... are its decision values (if they are given).

    Note this implementation is concerned only with the 'bbob-biobj' suite and applies reformatting only on the archive
    files that correspond to the problems contained in this suite.

    :param input_path: path to the folder with input archives
    :param output_path: path to the folder where output archives are stored to, if any files already exist there, they
    get appended to
    :param functions: list of function numbers to be included in the reformatting
    :param instances: list of instance numbers to be included in the reformatting
    :param dimensions: list of dimensions to be included in the reformatting
    """
    suite_name = 'bbob-biobj'
    print('Reformatting archive files for the {} suite...'.format(suite_name))

    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_path))

    # Create output folder if it does not exist yet
    create_path(output_path)

    # Read the input files one by one
    for input_file in input_files:

        try:
            (function, instance,
             dimension) = parse_old_arhive_file_name(input_file)
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        # Open the output file
        output_file = os.path.join(
            output_path, '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(
                suite_name, function, instance, dimension))

        with open(input_file, 'r') as f_in:
            with open(output_file, 'a') as f_out:
                # Perform reformatting

                print(input_file)
                f_out.write('% instance = {}\n%\n'.format(instance))

                for line in f_in:
                    if line[0] == '#':
                        continue

                    if dimension <= 5:
                        f_out.write('1 \t{} \t{}\n'.format(
                            ' \t'.join(line.split()[dimension:dimension + 2]),
                            ' \t'.join(line.split()[0:dimension])))
                    else:
                        f_out.write('1 \t{}\n'.format(' \t'.join(
                            line.split()[0:2])))

            f_out.close()
        f_in.close()
    print('Done!')
Exemplo n.º 7
0
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension
                                                           not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None
        instance = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue

                elif line[0] == '%':
                    if 'instance' in line:
                        if f_out and not f_out.closed:
                            if len(buffered_lines) > 0:
                                f_out.write(buffered_lines)
                                buffered_lines = ''
                            f_out.close()
                        instance = int(get_key_value(line[1:], 'instance'))
                        if instance in instances:
                            output_file = os.path.join(
                                output_path,
                                '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.
                                format(suite_name, function, instance,
                                       dimension))
                            f_out = open(output_file, 'w')
                        else:
                            instance = None

                    if instance:
                        buffered_lines += line

                elif (line[0] != '%') and instance:
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            if len(buffered_lines) > 0:
                f_out.write(buffered_lines)
            f_out.close()
Exemplo n.º 8
0
def archive_thinning(input_path, output_path, thinning_precision,
                     currently_nondominated, functions, instances, dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if not instance:
                raise PreprocessingWarning(
                    'Thinning does not work on files with multiple archives, use archive_split'
                )
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [
                            (f_original[i] - ideal[i]) / normalization[i]
                            for i in range(2)
                        ]
                        f_normalized = [
                            round(f_normalized[i] / thinning_precision)
                            for i in range(2)
                        ]
                        f_normalized = [
                            ideal[i] + f_normalized[i] * thinning_precision
                            for i in range(2)
                        ]
                        updated = thinned_archive.add_solution(
                            f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.
                              format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions
                                           == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(
            all_solutions, thinned_solutions,
            100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)