def execute(argv):
    """Generate random sequences for each benchmark"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no benchmarks to process')

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_in_dir = os.path.join(FLAGS.benchmarks_directory, bench_dir,
                                    bench_name)

        if not os.path.isdir(bench_in_dir):
            continue

        bench_out_dir = os.path.join(FLAGS.results_directory, bench_dir)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_out_dir)
        except FileExistsError:
            pass

        filename = '{}/{}.yaml'.format(bench_out_dir, bench_name)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        results = {}
        for compiler in ['opt', 'llvm']:
            for level in FLAGS.levels:
                goal_value = Engine.evaluate(
                    Goals.prepare_goals(FLAGS.goals,
                                        FLAGS.weights), '-{}'.format(level),
                    compiler, bench_in_dir, FLAGS.working_set, FLAGS.times,
                    FLAGS.tool, FLAGS.verify_output)
                compiler_name = 'clang' if compiler == 'llvm' else 'opt'
                if compiler_name not in results:
                    results[compiler_name] = {}
                results[compiler_name][level] = {
                    'goal': goal_value,
                    'seq': ['-{}'.format(level)]
                }

        IO.dump_yaml(results, filename)
Exemplo n.º 2
0
    def __populate_data(training_benchmarks, training_directory,
                        test_benchmarks, test_directory):
        """Create test and training data

        Parameters
        ----------
        training_benchmarks : list

        training_directory : str

        tests_benchmark : list

        test_directory : str

        Returns
        -------
        training_data : pandas.DataFrame

        test_data : pandas.DataFrame
        """
        training_data = {}
        for training_benchmark in training_benchmarks:
            index = training_benchmark.find('.')
            suite_name = training_benchmark[:index]
            bench_name = training_benchmark[index + 1:]

            benchmark_dir = os.path.join(training_directory, suite_name)

            data = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                benchmark_dir, bench_name))
            if data:
                training_data[training_benchmark] = data

        if not training_data:
            log.error('Training features do not exist.')
            exit(1)

        test_data = {}
        for test_benchmark in test_benchmarks:
            index = test_benchmark.find('.')
            suite_name = test_benchmark[:index]
            bench_name = test_benchmark[index + 1:]

            benchmark_dir = os.path.join(test_directory, suite_name)

            data = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                benchmark_dir, bench_name))
            if data:
                test_data[test_benchmark] = data

        if not test_data:
            lg.error('Training features do not exist.')
            exit(1)

        training_data = pd.DataFrame.from_dict(training_data, orient='index')
        test_data = pd.DataFrame.from_dict(test_data, orient='index')

        return training_data, test_data
Exemplo n.º 3
0
def execute(argv):
    """Generate random sequences for each benchmark"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Verify benchmark directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        suite_name = benchmark[:index]
        bench_name = benchmark[index + 1:]

        benchmark_dir = os.path.join(FLAGS.benchmarks_directory, suite_name,
                                     bench_name)

        if not os.path.isdir(benchmark_dir):
            continue

        results_dir = os.path.join(FLAGS.results_directory, suite_name)

        # Create the results directory for the suite
        try:
            os.makedirs(results_dir)
        except FileExistsError:
            pass

        filename = '{}/{}.yaml'.format(results_dir, bench_name)

        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        Engine.compilee(benchmark_dir, 'opt', '-{}'.format(FLAGS.baseline))

        features = Milepost.extract(benchmark_dir)

        # Engine.cleanup(benchmark_dir, 'opt')

        IO.dump_yaml(features, filename)
def execute(argv):
    """Generate random sequences for each benchmark"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Verify benchmark directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Initialize a Random object
    rnd = Random(FLAGS.nof_sequences, FLAGS.minimum, FLAGS.maximum,
                 FLAGS.factor, FLAGS.ssa, FLAGS.shuffle, FLAGS.update,
                 FLAGS.repetition, FLAGS.original, FLAGS.passes_filename,
                 Goals.prepare_goals(FLAGS.goals, FLAGS.weights), 'opt',
                 FLAGS.benchmarks_directory, FLAGS.working_set, FLAGS.times,
                 FLAGS.tool, FLAGS.verify_output)

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir = os.path.join(FLAGS.results_directory, bench_dir)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_dir)
        except FileExistsError:
            pass

        filename = '{}/{}.yaml'.format(bench_dir, bench_name)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        rnd.run(benchmark)
        if rnd.results:
            IO.dump_yaml(rnd.results, filename, FLAGS.report_only_the_best)
Exemplo n.º 5
0
def execute(argv):
    """Generate the report"""

    del argv

    FLAGS = flags.FLAGS

    results_files = glob.glob('{}/*.yaml'.format(FLAGS.data_directory))

    report = {}
    strategies = []
    for results in tqdm(results_files, desc='Processing'):
        index = results.rfind('_')
        benchmark = results[:index]
        benchmark = benchmark.replace('{}/'.format(FLAGS.data_directory), '')
        strategy = results[index+1:]
        strategy = strategy.replace('.yaml', '')
        if strategy not in strategies:
            strategies.append(strategy)

        if benchmark not in report:
            report[benchmark] = {}

        data = IO.load_yaml(results)
        values = [str_data['goal'] for _, str_data in data.items()]
        report[benchmark][strategy] = sum(values)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.report_filename)
    IO.dump_yaml(report, filename)

    strategies.sort()
    csv_filename = filename.replace('.yaml', '.csv')

    with open(csv_filename, 'w') as f:
        w = csv.DictWriter(f, ['bench']+strategies)
        w.writeheader()
        line = {}
        for bench_name, data in report.items():
            line['bench'] = bench_name
            for strategy_name, strategy_value in data.items():
                line[strategy_name] = strategy_value
            w.writerow(line)
Exemplo n.º 6
0
def execute(argv):
    """Create a small sequence"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no benchmarks to process')

    # Verify directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.training_directory):
        logging.error('Training directory {} does not exist.'.format(
            FLAGS.training_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Initialize a BenchmarkReduction object
    bred = BenchmarkReduction(FLAGS.baseline, FLAGS.benchmarks_directory,
                              FLAGS.results_directory)

    # Reduce
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir = os.path.join(FLAGS.training_directory, bench_dir)

        sequences = IO.load_yaml_or_fail('{}/{}.yaml'.format(
            bench_dir, bench_name))
        sequence = Sequence.get_the_best(sequences)
        for _, seq_data in sequence.items():
            sequence = seq_data['seq']

        bred.run(benchmark, sequence)
def find_sequences(test_benchmark, training_benchmaks, distance_directory,
                   training_data_directory, nof_sequences):
    """Get N sequences from the most similar benchmark"""

    # Test suite and benchmark
    index = test_benchmark.find('.')
    test_suite_name = test_benchmark[:index]
    test_bench_name = test_benchmark[index + 1:]

    # Find the training suites
    training_suites = []
    for training_benchmark in training_benchmaks:
        index = training_benchmark.find('.')
        training_suite_name = training_benchmark[:index]
        if training_suite_name not in training_suites:
            training_suites.append(training_suite_name)

    # Find the closer benchmark
    closer = []
    for training_suite in training_suites:
        d_directory = os.path.join(distance_directory, test_suite_name,
                                   training_suite)
        filename = '{}/{}.yaml'.format(d_directory, test_bench_name)
        distance_data = IO.load_yaml(filename)
        closer += [(distance, training_suite, training_bench)
                   for training_bench, distance in distance_data.items()]

    closer.sort()
    closer_suite_name = closer[0][1]
    closer_bench_name = closer[0][2]

    # Load closer benchmark data
    d_directory = os.path.join(training_data_directory, closer_suite_name)
    filename = '{}/{}.yaml'.format(d_directory, closer_bench_name)
    training_data = IO.load_yaml_or_fail(filename)

    # Rank sequences
    rank = [(seq_data['goal'], seq_key)
            for seq_key, seq_data in training_data.items()]
    rank.sort()

    # Extract N sequences
    best = {}
    for i, (_, seq_key) in enumerate(rank):
        best[seq_key] = training_data[seq_key].copy()
        if i + 1 == nof_sequences:
            break
    return closer_suite_name, best
def execute(argv):
    """Find the best K sequences, from training data."""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no training benchmarks to process.')

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Verify directories
    if not os.path.isdir(FLAGS.training_directory):
        logging.error('Training directory {} does not exit.'.format(
            FLAGS.training_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.baseline_directory):
        logging.error('Baseline directory {} does not exit.'.format(
            FLAGS.baseline_directory))
        sys.exit(1)

    # Initialize a BestK object
    bestk = BestK(FLAGS.training_directory, FLAGS.baseline_directory)

    # Execute
    for k in tqdm(FLAGS.k, desc='Best-k'):
        filename = '{}/best_{}.yaml'.format(FLAGS.results_directory, k)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        bestk.run(benchmarks, FLAGS.compiler, FLAGS.baseline, int(k))

        # Store the results
        IO.dump_yaml(bestk.results, filename)

        # Store the number of the programs cover by each sequence
        filename = '{}/covering_{}.yaml'.format(FLAGS.results_directory, k)
        IO.dump_yaml(bestk.covering, filename)
Exemplo n.º 9
0
    def __init__(self,
                 dimension,
                 population,
                 passes_filename,
                 goals,
                 compiler,
                 benchmarks_directory,
                 working_set,
                 times,
                 tool,
                 verify_output):
        """Initialize the arguments.

        Arguments
        ----------
        dimension : int
            The length of a sequence.

        population : int

        passes_filename : str
            The file that describes the passes to use.

        goals : dict

        compiler : str

        benchmarks_directory : str

        working_set : int
            The dataset to execute the benchmark.

        times: int
            Execution times

        tool: str
            Execution tool

        verify_output: bool
            The goal is valid only if the execution status is OK.
        """
        first_key, last_key, passes_dict = IO.load_passes(passes_filename)

        # When the goal is obtained during compile time
        # and the dataset is not defined during compilation,
        # we do not need the dataset.
        self.__flags = self.PygmoFlags(first_key,
                                       last_key,
                                       passes_dict,
                                       dimension,
                                       population,
                                       goals,
                                       compiler,
                                       benchmarks_directory,
                                       working_set,
                                       times,
                                       tool,
                                       verify_output)
Exemplo n.º 10
0
def execute(argv):
    """Create a small sequence"""

    del argv

    FLAGS = flags.FLAGS

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no benchmarks to process')

    # Create the sequences file
    results = {}
    counter = 0
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir = os.path.join(FLAGS.training_directory, bench_dir)

        filename = '{}/{}.yaml'.format(bench_dir, bench_name)

        sequences = IO.load_yaml_or_fail(filename)
        sequences = Sequence.get_the_best(sequences)

        for _, data in sequences.items():
            sequence = data['seq']
            if Sequence.exist(sequence, results):
                continue
            results[counter] = {'seq': sequence}
            counter += 1

    filename = '{}/{}'.format(FLAGS.results_directory,
                              FLAGS.sequences_filename)
    IO.dump_yaml(results, filename)
Exemplo n.º 11
0
    def execute(benchmark_directory, working_set, times, tool, verify_output,
                warmup_cache, runtime):
        """Execute the benchmark.

        Argument
        ---------
        benchmark_directory : str

        working_set : int

        times : int

        tool: int
            The user can use the flag to invoke different
            types of execution: hyperfine, pin, perf, ...

        verify_output: bool
            The goal value is valid if and only if
            the output (execution) is correct.

        warmup_cache: bool

        runtime: int
            The runtime execution (timeout)

        Return
        ------
        value : float
            The number of instructions.
        """
        cmdline = 'curr_dir=$PWD ; cd {0} ; \
        ./execute.sh {1} {2} {3} {4} {5} {6}; cd $curr_dir'.format(
            benchmark_directory, working_set, times, tool,
            1 if verify_output else 0, 1 if warmup_cache else 0, runtime)
        try:
            subprocess.run(cmdline,
                           shell=True,
                           check=True,
                           capture_output=False)
        except subprocess.CalledProcessError:
            lg.error('Execute {}'.format(benchmark_directory))
            exit(1)

        if verify_output:
            return IO.load_execution_status(benchmark_directory)
        return True
Exemplo n.º 12
0
def execute(argv):
    """Generate the report"""

    del argv

    FLAGS = flags.FLAGS

    results_files = glob.glob('{}/*.yaml'.format(FLAGS.data_directory))

    report = {}
    strategies = []
    for results in tqdm(results_files, desc='Processing'):
        benchmark = results.replace('{}/'.format(FLAGS.data_directory), '')
        benchmark = benchmark.replace('.yaml', '')

        if benchmark not in report:
            report[benchmark] = {}

        data = IO.load_yaml(results)
        for compiler_name, compiler_data in data.items():
            for level_name, level_data in compiler_data.items():
                strategy = '{}_{}'.format(compiler_name, level_name)
                if strategy not in strategies:
                    strategies.append(strategy)
                report[benchmark][strategy] = level_data['goal']

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    csv_filename = '{}/{}'.format(FLAGS.results_directory,
                                  FLAGS.report_filename)
    with open(csv_filename, 'w') as f:
        w = csv.DictWriter(f, ['bench'] + strategies)
        w.writeheader()
        line = {}
        for bench_name, bench_data in report.items():
            line['bench'] = bench_name
            for level, level_value in bench_data.items():
                line[level] = level_value
            w.writerow(line)
Exemplo n.º 13
0
def execute(argv):
    """Generate a CSV from YAML reports"""

    del argv

    FLAGS = flags.FLAGS

    results_files = glob.glob('{}/*.yaml'.format(FLAGS.data_directory))

    report = {}
    strategies = []
    for results in tqdm(results_files, desc='Report'):
        benchmark = results.replace('{}/'.format(FLAGS.data_directory), '')
        benchmark = benchmark.replace('.yaml', '')

        if benchmark not in report:
            report[benchmark] = {}

        data = IO.load_yaml(results)
        for seq_name, seq_data in data.items():
            if seq_name not in strategies:
                strategies.append(seq_name)
            report[benchmark][seq_name] = seq_data['goal']

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    csv_filename = '{}/{}'.format(FLAGS.results_directory,
                                  FLAGS.report_filename)

    with open(csv_filename, 'w') as f:
        w = csv.DictWriter(f, ['bench'] + strategies)
        w.writeheader()
        line = {}
        for bench_name, data in report.items():
            line['bench'] = bench_name
            for seq_name, goal_value in data.items():
                line[seq_name] = goal_value
            w.writerow(line)
def execute(argv):
    """Evaluate N sequences"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        # The benchmark
        index = benchmark.find('.')
        suite = benchmark[:index]
        bench_name = benchmark[index + 1:]

        # Create the results directory for the suite
        results_dir = os.path.join(FLAGS.results_directory, suite)

        try:
            os.makedirs(results_dir)
        except FileExistsError:
            pass

        # Verify report
        if FLAGS.suffix:
            output_filename = '{}/{}_{}.yaml'.format(results_dir, bench_name,
                                                     FLAGS.suffix)
        else:
            output_filename = '{}/{}.yaml'.format(results_dir, bench_name)

        if FLAGS.verify_report and os.path.isfile(output_filename):
            continue

        # Benchmark directory
        bench_dir = os.path.join(FLAGS.benchmarks_directory, suite, bench_name)

        if not os.path.isdir(bench_dir):
            logging.error('Benchmark {} does not exist.'.format(benchmark))
            sys.exit(1)

        # The training data
        training_dir = os.path.join(FLAGS.training_directory, suite)
        filename = '{}/{}.yaml'.format(training_dir, bench_name)

        sequences = IO.load_yaml_or_fail(filename)
        if not sequences:
            logging.error('There are no sequences to process')
            sys.exit(1)

        best_sequence = Sequence.get_the_best(sequences)

        # Verify if the best sequence is better than the baseline
        baseline_dir = os.path.join(FLAGS.baseline_directory, suite)
        filename = '{}/{}.yaml'.format(baseline_dir, bench_name)
        baseline_data = IO.load_yaml_or_fail(filename)
        if not baseline_data:
            logging.error('There are no baseline data')
            sys.exit(1)

        baseline_goal = baseline_data[FLAGS.compiler][FLAGS.baseline]['goal']
        for _, data in best_sequence.items():
            best_sequence_goal = data['goal']

        if not (best_sequence_goal < baseline_goal):
            continue

        sequences = split_sequence(best_sequence)

        # Process the sequences
        results = {}
        for key, data in sequences.items():
            goal_value = Engine.evaluate(
                Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                Sequence.name_pass_to_string(data['seq']), 'opt', bench_dir,
                FLAGS.working_set, FLAGS.times, FLAGS.tool,
                FLAGS.verify_output)
            results[key] = {'seq': data['seq'], 'goal': goal_value}

        # Store the results
        IO.dump_yaml(results, output_filename)
Exemplo n.º 15
0
    def run(self, training_benchmarks, compiler, baseline, k):
        """
        Best-k

        Fast and effective orchestration of compiler optimizations
        for automatic performance tuning
        Z. Pan and R. Eigenmann
        International Symposium on Code Generation and Optimization
        2006
        10.1109/CGO.2006.38

        Argument
        --------
        training_benchmarks : list

        compiler : str

        baseline : str

        k : int
            Number of sequences
        """
        # Create the dictionary
        dictionary = {}
        best_sequences = {}
        for training_benchmark in training_benchmarks:
            index = training_benchmark.find('.')
            bench_dir = training_benchmark[:index]
            bench_name = training_benchmark[index + 1:]

            training_dir = os.path.join(self.__flags.training_directory,
                                        bench_dir)
            baseline_dir = os.path.join(self.__flags.baseline_directory,
                                        bench_dir)

            training_sequences = IO.load_yaml('{}/{}.yaml'.format(
                training_dir, bench_name))

            if not training_sequences:
                continue

            baseline_goal_value = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                baseline_dir, bench_name))
            baseline_goal_value = baseline_goal_value[compiler][baseline][
                'goal']

            # For each sequence
            for seq in training_sequences.keys():
                if seq not in dictionary.keys():
                    dictionary[seq] = []
                    best_sequences[seq] = training_sequences[seq]['seq']

                goal_value = training_sequences[seq]['goal']

                # Store the fitness
                if goal_value < baseline_goal_value:
                    improvement = ((baseline_goal_value - goal_value) /
                                   baseline_goal_value) * 100
                    dictionary[seq].append((training_benchmark, improvement))

        # Find the best dictionary entries
        if dictionary:
            bestk = []
            self.__covering = {}
            for _ in range(k):

                progs = []
                for _, data in dictionary.items():
                    progs += [p for p, _ in data if p not in progs]
                if len(progs) == 0:
                    break

                key = self.__get_maximum(dictionary)
                dictionary_entry = dictionary[key].copy()
                self.__covering[key] = len(dictionary_entry)

                bestk.append(key)

                for key, data in dictionary.items():
                    for program, improvement in dictionary_entry:
                        index = self.__program_in_dictionary(program, data)
                        if index > -1:
                            del dictionary[key][index]

            # Store the best k sequences
            self.__results = {}
            for best in bestk:
                self.__results[best] = {'x': best_sequences[best]}
def execute(argv):
    """Generate the figure"""

    del argv

    FLAGS = flags.FLAGS

    validate_baselines(FLAGS.baselines)

    # Read the reports
    if not os.path.isfile(FLAGS.data_report):
        logging.fatal('CSV report {} does not exist.'.format(
            FLAGS.data_report))

    if not os.path.isfile(FLAGS.levels_report):
        logging.fatal('CSV report {} does not exist.'.format(
            FLAGS.levels_report))

    goal_data = pd.read_csv(FLAGS.data_report)
    goal_data.index = goal_data.bench
    goal_data = goal_data.drop(columns='bench')

    _, strategies, _ = sort_strategies(goal_data.columns)

    level_data = pd.read_csv(FLAGS.levels_report)
    level_data.index = level_data.bench
    level_data = level_data.drop(columns='bench')

    statistics = {}
    dispersion = {'value': [], 'strategy': []}
    for baseline in FLAGS.baselines:
        statistics[baseline] = {}
        for strategy in strategies:
            if FLAGS.improvement:
                values = [
                    goal_data.loc[index][strategy] /
                    level_data.loc[index][baseline]
                    for index in goal_data.index
                ]
            else:
                values = [
                    level_data.loc[index][baseline] -
                    goal_data.loc[index][strategy] for index in goal_data.index
                ]
            strg = [
                '{}_{}'.format(baseline, strategy) for i in range(len(values))
            ]
            dispersion['value'] += values.copy()
            dispersion['strategy'] += strg.copy()
            statistics[baseline][strategy] = values.copy()

    # The data to plot
    dispersion = pd.DataFrame.from_dict(dispersion)

    # PLOT
    _, ax = plt.subplots()

    # plt.ylim(-1500, 1500)
    plt.ylabel(FLAGS.y_label, fontsize=10, fontweight='bold')
    plt.xlabel(FLAGS.x_label, fontsize=10, fontweight='bold')
    plt.tight_layout()

    y_labels = [
        '{}_{}'.format(baseline, strategy)
        for baseline, _ in statistics.items() for strategy in strategies
    ]

    ax = sns.violinplot(ax=ax,
                        x="value",
                        y="strategy",
                        order=y_labels,
                        data=dispersion)

    if FLAGS.disable_labels:
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set(xlabel='', ylabel='')
    else:
        ax.set_yticklabels(y_labels, fontsize=8)
        ax.set(xlabel=FLAGS.x_label, ylabel=FLAGS.y_label)

    filename = '{}/{}'.format(
        FLAGS.output_directory, FLAGS.figure_filename
    ) if FLAGS.output_directory else FLAGS.figure_filename

    # Create the results directory
    if FLAGS.output_directory:
        try:
            os.makedirs(FLAGS.output_directory)
        except FileExistsError:
            pass

    plt.savefig(filename)
    plt.show()
    # Statistics
    stats = {}
    for b_name, b_data in statistics.items():
        stats[b_name] = {}
        for s_name, s_data in b_data.items():
            stats[b_name][s_name] = {
                'min': float(np.min(s_data)),
                'max': float(np.max(s_data)),
                'mean': float(np.mean(s_data)),
                'median': float(np.median(s_data)),
                '1quantile': float(np.quantile(s_data, 0.25)),
                '2quantile': float(np.quantile(s_data, 0.50)),
                '3quantile': float(np.quantile(s_data, 0.75))
            }

    index = FLAGS.figure_filename.rfind('.')
    sname = FLAGS.figure_filename[:index]

    filename = '{}/{}.yaml'.format(
        FLAGS.output_directory,
        sname,
    ) if FLAGS.output_directory else sname

    IO.dump_yaml(stats, filename)
def execute(argv):
    """Evaluate N sequences"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # The sequences
    sequences = IO.load_yaml_or_fail(FLAGS.sequences_filename)
    if not sequences:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index+1:]

        bench_in_dir = os.path.join(FLAGS.benchmarks_directory,
                                    bench_dir,
                                    bench_name)

        if not os.path.isdir(bench_in_dir):
            continue

        bench_out_dir = os.path.join(FLAGS.results_directory,
                                     bench_dir)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_out_dir)
        except FileExistsError:
            pass

        # Verify report
        if FLAGS.suffix:
            filename = '{}/{}_{}.yaml'.format(
                bench_out_dir,
                bench_name,
                FLAGS.suffix
            )
        else:
            filename = '{}/{}.yaml'.format(bench_out_dir, bench_name)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        # Process the sequences
        results = {}
        for key, data in sequences.items():
            goal_value = Engine.evaluate(
                Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                Sequence.name_pass_to_string(data['seq']),
                'opt',
                bench_in_dir,
                FLAGS.working_set,
                FLAGS.times,
                FLAGS.tool,
                FLAGS.verify_output
            )
            results[key] = {'seq': data['seq'], 'goal': goal_value}

        # Store the results
        IO.dump_yaml(results, filename)
def execute(argv):
    """Generate random sequences for each benchmark"""

    FLAGS = flags.FLAGS

    results_directory = FLAGS.results_directory

    # Test benchmarks
    test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename)
    if not test_benchmarks:
        logging.fatal('There are no test benchmarks to process')

    # Training benchmarks
    training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename)
    if not training_benchmarks:
        logging.fatal('There are no training benchmarks to process')

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Extract the representation for test programs
    print(bold('1. EXTRACTING THE REPRESENTATION'))
    FLAGS.results_directory = os.path.join(results_directory, 'representation')
    FLAGS.benchmarks_filename = FLAGS.test_benchs_filename
    representation.execute(argv)

    # Distance: test --> training
    print(bold('2. MEASURING THE DISTANCE'))
    distance_results_directory = os.path.join(results_directory, 'distance')
    FLAGS.results_directory = distance_results_directory
    FLAGS.test_representation_directory = os.path.join(results_directory,
                                                       'representation')
    distance.execute(argv)

    # Process test benchmarks
    print(bold('3. PROCESSING THE BENCHMARKS'))
    for nof_sequences in tqdm(FLAGS.nof_sequences, desc='Processing'):
        for test_benchmark in test_benchmarks:
            index = test_benchmark.find('.')
            suite_name = test_benchmark[:index]
            bench_name = test_benchmark[index + 1:]

            # Find the best N sequences
            training_suite, sequences = find_sequences(
                test_benchmark, training_benchmarks,
                distance_results_directory, FLAGS.training_data_directory,
                int(nof_sequences))

            # Goal_name
            if len(FLAGS.goals) > 1:
                goal_name = '_'.join(FLAGS.goals)
            else:
                goal_name = FLAGS.goals[0]

            # Create the results directory for the suite
            results_dir = os.path.join(results_directory,
                                       'predictive_compilation',
                                       training_suite, goal_name)
            try:
                os.makedirs(results_dir)
            except FileExistsError:
                pass

            filename = '{}/{}_j{}.yaml'.format(results_dir, bench_name,
                                               nof_sequences)

            if FLAGS.verify_report and os.path.isfile(filename):
                continue

            results = {}

            for key, data in sequences.items():
                goal_value = Engine.evaluate(
                    Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                    Sequence.name_pass_to_string(data['seq']), 'opt',
                    os.path.join(FLAGS.benchmarks_directory, suite_name,
                                 bench_name), FLAGS.working_set, FLAGS.times,
                    FLAGS.tool, FLAGS.verify_output)
                results[key] = {'seq': data['seq'], 'goal': goal_value}

            IO.dump_yaml(results, filename)
def execute(argv):
    """Generate the report"""

    del argv

    FLAGS = flags.FLAGS

    from_files = glob.glob('{}/*.yaml'.format(FLAGS.from_directory))

    report = {}
    strategies = []
    statistics = {}
    for from_file in tqdm(from_files, desc='Processing'):
        index = from_file.rfind('_')
        benchmark = from_file[:index]
        benchmark = benchmark.replace('{}/'.format(FLAGS.from_directory), '')
        strategy = from_file[index + 1:]
        strategy = strategy.replace('.yaml', '')
        if strategy not in strategies:
            strategies.append(strategy)
            statistics[strategy] = []

        data = IO.load_yaml(from_file)
        the_best = Sequence.get_the_best(data)
        the_best_key = list(the_best.keys()).pop()

        filename = '{}*{}.yaml'.format(benchmark, strategy)
        to_files = glob.glob('{}/{}'.format(FLAGS.to_directory, filename))
        for to_file in to_files:
            benchmark = to_file.replace('{}/'.format(FLAGS.to_directory), '')
            benchmark = benchmark.replace('_{}.yaml'.format(strategy), '')

            if benchmark not in report:
                report[benchmark] = {}

            data = IO.load_yaml(to_file)
            report[benchmark][strategy] = the_best.copy()
            statistics[strategy].append(data[the_best_key]['goal'])

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # YAML Report
    filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.report_filename)
    IO.dump_yaml(report, filename)

    # CSV Report
    strategies.sort()
    filename = filename.replace('.yaml', '.csv')

    with open(filename, 'w') as f:
        w = csv.DictWriter(f, ['bench'] + strategies)
        w.writeheader()
        line = {}
        for bench_name, data in report.items():
            line['bench'] = bench_name
            for strategy_name, strategy_value in data.items():
                line[strategy_name] = strategy_value
            w.writerow(line)

    # Statistics
    filename = filename.replace('.csv', '.stat')
    stats = {}
    for strategy, values in statistics.items():
        stats[strategy] = {
            'min': float(np.min(values)),
            'max': float(np.max(values)),
            'mean': float(np.mean(values)),
            'median': float(np.median(values)),
            '1quantile': float(np.quantile(values, 0.25)),
            '2quantile': float(np.quantile(values, 0.50)),
            '3quantile': float(np.quantile(values, 0.75))
        }

    IO.dump_yaml(stats, filename)
def execute(argv):
    """Generate the figure"""

    del argv

    FLAGS = flags.FLAGS

    validate_baselines(FLAGS.baselines)

    # Read the reports
    if not os.path.isfile(FLAGS.data_report):
        logging.fatal('CSV report {} does not exist.'.format(
            FLAGS.data_report)
        )

    if not os.path.isfile(FLAGS.levels_report):
        logging.fatal('CSV report {} does not exist.'.format(
            FLAGS.levels_report)
        )

    goal_data = pd.read_csv(FLAGS.data_report)
    goal_data.index = goal_data.bench
    goal_data = goal_data.drop(columns='bench')

    x_labels, strategies = sort_strategies(goal_data.columns)

    level_data = pd.read_csv(FLAGS.levels_report)
    level_data.index = level_data.bench
    level_data = level_data.drop(columns='bench')

    baselines_average = {}
    statistics = {}
    for baseline in FLAGS.baselines:
        baselines_average[baseline] = []
        statistics[baseline] = {}
        for strategy in strategies:
            values = [level_data.loc[index][baseline] - goal_data.loc[index][strategy]
                      for index in goal_data.index]
            baselines_average[baseline].append(np.mean(values))
            statistics[baseline][strategy] = values.copy()

    # PLOT
    symbols = ['p', 'x', '+', '*', 'v', '^',
               '<', '>', '1', '2', '3', '4',
               's', 'D', 'd', 'h', 'H']

    x = [i for i in range(len(strategies))]

    plt.style.use('seaborn-whitegrid')

    _, ax = plt.subplots()

    # plt.ylim(-1500, 1500)
    plt.ylabel(FLAGS.y_label, fontsize=10, fontweight='bold')
    plt.xlabel(FLAGS.x_label, fontsize=10, fontweight='bold')
    plt.xticks(x, labels=x_labels, fontweight='bold')
    # plt.xticks(fontsize=10, fontweight='bold')
    # plt.yticks(fontsize=10, fontweight='bold')
    plt.tight_layout()

    for i, (b_name, b_data) in enumerate(baselines_average.items()):
        ax.plot(x, b_data, symbols[i], color='gray',
                markersize=10,
                markerfacecolor='white',
                markeredgewidth=2, label=b_name)

    leg = ax.legend(prop={'weight': 'bold', 'size': 12}, ncol=2, loc=4)

    for text in leg.get_texts():
        plt.setp(text, color='gray')

    filename = '{}/{}'.format(
        FLAGS.output_directory,
        FLAGS.figure_filename
    ) if FLAGS.output_directory else FLAGS.figure_filename

    # Create the results directory
    if FLAGS.output_directory:
        try:
            os.makedirs(FLAGS.output_directory)
        except FileExistsError:
            pass

    plt.savefig(filename)
    plt.show()

    # Statistics
    stats = {}
    for b_name, b_data in statistics.items():
        stats[b_name] = {}
        for s_name, s_data in b_data.items():
            stats[b_name][s_name] = {
                'min': float(np.min(s_data)),
                'max': float(np.max(s_data)),
                'mean': float(np.mean(s_data)),
                'median': float(np.median(s_data)),
                '1quantile': float(np.quantile(s_data, 0.25)),
                '2quantile': float(np.quantile(s_data, 0.50)),
                '3quantile': float(np.quantile(s_data, 0.75))
            }

    index = FLAGS.figure_filename.rfind('.')
    sname = FLAGS.figure_filename[:index]

    filename = '{}/{}.yaml'.format(
        FLAGS.output_directory,
        sname,
    ) if FLAGS.output_directory else sname

    IO.dump_yaml(stats, filename)
Exemplo n.º 21
0
def execute(argv):
    """Generate genetic sequences for each benchmark"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Verify benchmark directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory)
        )
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Initialize a SGA object
    sga = SGA(FLAGS.generations,
              FLAGS.population,
              FLAGS.cr,
              FLAGS.m,
              FLAGS.param_m,
              FLAGS.param_s,
              FLAGS.crossover,
              FLAGS.mutation,
              FLAGS.selection,
              FLAGS.seed,
              FLAGS.dimension,
              FLAGS.passes_filename,
              Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
              'opt',
              FLAGS.benchmarks_directory,
              FLAGS.working_set,
              FLAGS.times,
              FLAGS.tool,
              FLAGS.verify_output)

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index+1:]

        bench_dir = os.path.join(FLAGS.results_directory,
                                 bench_dir)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_dir)
        except FileExistsError:
            pass

        filename = '{}/{}.yaml'.format(bench_dir, bench_name)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        sga.run(benchmark)

        if sga.results:
            IO.dump_yaml(sga.results,
                         filename,
                         FLAGS.report_only_the_best)
    def create_random_sequences(nof_sequences, minimum, maximum, factor, ssa,
                                shuffle, update_, repetition, original,
                                passes_filename):
        """Create N random sequences.

        Arguments
        ----------
        nof_sequences : int
            The number of sequences.

        minimum : int
            The minimum and maximum length of the sequence.

        maximum : int
            The maximum length of the sequence.

        factor : int
            The times to appy to nof_sequences. (nof_sequences *= factor)

        ssa : bool
            Enable ssa?

        shuffle : bool
            Enable shuffle?

        update : bool
            Enable update?

        repetition : bool
            Enable repetition?

        original : bool
            Insert the orginal?

        passes_filename : str
            The yaml filename which describes the available passes.

        Return
        ------
        sequences : dict
            A dictionary which contains N random sequences.
        """
        if (not repetition) and (maximum > (maximum * 0.7)):
            lg.error('adjust MAXIMUM lenght. MAXIMUM \
            should be less than 70% of |PASSES|')
            sys.exit(1)
        if not (original or update_ or shuffle):
            lg.error('Error: it is necessary to use at \
            least one argument (-original, -update, -shuffle)')
            sys.exit(1)

        # Load the passes
        first_key, last_key, passes = IO.load_passes(passes_filename)
        counter = 0
        sequences = {}
        nof_sequences *= factor

        while True:
            # generate a sequence
            seq = Sequence.create_random_sequence(first_key, last_key, passes,
                                                  minimum, maximum, repetition)
            seq = Sequence.sanitize(seq)

            if ssa:
                seq = Sequence.mem2reg_first(seq)

            if original:
                if not Sequence.exist(seq, sequences):
                    sequences[counter] = {'seq': seq}
                    counter += 1
                    if counter >= nof_sequences:
                        break
                if shuffle:
                    sseq = seq[:]
                    rn.shuffle(sseq)
                    sseq = Sequence.sanitize(sseq)
                    if not Sequence.exist(sseq, sequences):
                        sequences[counter] = {'seq': sseq}
                        counter += 1
                        if counter >= nof_sequences:
                            break
            if update_:
                seq = Sequence.update(seq)
                seq = Sequence.sanitize(seq)

                if not Sequence.exist(seq, sequences):
                    sequences[counter] = {'seq': seq}
                    counter += 1
                    if counter >= nof_sequences:
                        break

                if shuffle:
                    seq = Sequence.update(seq)
                    seq = Sequence.sanitize(seq)
                    if not Sequence.exist(seq, sequences):
                        sequences[counter] = {'seq': seq}
                        counter += 1
                        if counter >= nof_sequences:
                            break

        return sequences
Exemplo n.º 23
0
def execute(argv):
    """Create a small sequence"""

    del argv

    FLAGS = flags.FLAGS

    # Verify directories
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.training_directory):
        logging.error('Training directory {} does not exist.'.format(
            FLAGS.training_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # The benchmarks
    if FLAGS.benchmarks_filename:
        benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
        if not benchmarks:
            logging.exit('There are no benchmarks to process')
            sys.exit(1)
    else:
        benchmarks = glob.glob('{}/*.yaml'.format(FLAGS.training_directory))
        benchmarks = [
            b.replace('{}/'.format(FLAGS.training_directory),
                      '').replace('.yaml', '') for b in benchmarks
        ]

    # Initialize a SequenceReduction object
    seqred = SequenceReduction(Goals.prepare_goals(FLAGS.goals,
                                                   FLAGS.weights), 'opt',
                               FLAGS.benchmarks_directory, FLAGS.working_set,
                               FLAGS.times, FLAGS.tool, FLAGS.verify_output)

    # Reduce
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir_result = os.path.join(FLAGS.results_directory, bench_dir)
        filename_result = '{}/{}.yaml'.format(bench_dir_result, bench_name)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_dir_result)
        except FileExistsError:
            pass

        if FLAGS.verify_report and os.path.isfile(filename_result):
            continue

        bench_dir_training = os.path.join(FLAGS.training_directory, bench_dir)
        filename_training = '{}/{}.yaml'.format(bench_dir_training, bench_name)

        sequences = IO.load_yaml(filename_training)
        sequences = Sequence.get_the_best(sequences, FLAGS.nof_sequences)

        results = {}
        counter = 0
        for _, data in sequences.items():
            seqred.run(data['seq'], benchmark)

            if not FLAGS.report_only_the_small:
                results[counter] = {
                    'seq': seqred.results[0]['seq'],
                    'goal': seqred.results[0]['goal']
                }
                counter += 1

            results[counter] = {
                'seq': seqred.results[1]['seq'],
                'goal': seqred.results[1]['goal']
            }
            counter += 1

        IO.dump_yaml(results, filename_result)
def execute(argv):
    """Find the euclidean distance from test to training data."""

    del argv

    FLAGS = flags.FLAGS

    # The training benchmarks
    training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename)
    if not training_benchmarks:
        logging.error('There are no training benchmarks to process')
        sys.exit(1)

    # The training benchmarks
    test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename)
    if not test_benchmarks:
        logging.error('There are no test benchmarks to process')
        sys.exit(1)

    # Verify directories
    if not os.path.isdir(FLAGS.training_representation_directory):
        logging.error('Training directory {} does not exist.'.format(
            FLAGS.training_representation_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.test_representation_directory):
        logging.error('Test directory {} does not exist.'.format(
            FLAGS.test_representation_directory))
        sys.exit(1)

    # Measure the distance
    if FLAGS.distance == 'euclidean':
        distance = Distance.euclidean(training_benchmarks,
                                      FLAGS.training_representation_directory,
                                      test_benchmarks,
                                      FLAGS.test_representation_directory)
    if FLAGS.distance == 'manhattan':
        distance = Distance.manhattan(training_benchmarks,
                                      FLAGS.training_representation_directory,
                                      test_benchmarks,
                                      FLAGS.test_representation_directory)

    if FLAGS.distance == 'cosine':
        distance = Distance.cosine(training_benchmarks,
                                   FLAGS.training_representation_directory,
                                   test_benchmarks,
                                   FLAGS.test_representation_directory)

    # Store the distance
    for i, test_bench in enumerate(tqdm(test_benchmarks, desc='Processing')):
        index = test_bench.find('.')
        test_suite_name = test_bench[:index]
        test_bench_name = test_bench[index + 1:]

        results = {}
        for j, training_bench in enumerate(training_benchmarks):
            index = training_bench.find('.')
            training_suite_name = training_bench[:index]
            training_bench_name = training_bench[index + 1:]

            if training_suite_name not in results:
                results[training_suite_name] = {}

            results[training_suite_name][training_bench_name] = float(
                distance[i][j])

        for training_suite_name, training_distance in results.items():
            results_dir = os.path.join(FLAGS.results_directory,
                                       test_suite_name, training_suite_name)

            # Create the results directory
            try:
                os.makedirs(results_dir)
            except FileExistsError:
                pass

            filename = '{}/{}.yaml'.format(results_dir, test_bench_name)
            IO.dump_yaml(training_distance, filename)