Пример #1
0
    def __populate_data(training_benchmarks, training_directory,
                        test_benchmarks, test_directory):
        """Create test and training data

        Parameters
        ----------
        training_benchmarks : list

        training_directory : str

        tests_benchmark : list

        test_directory : str

        Returns
        -------
        training_data : pandas.DataFrame

        test_data : pandas.DataFrame
        """
        training_data = {}
        for training_benchmark in training_benchmarks:
            index = training_benchmark.find('.')
            suite_name = training_benchmark[:index]
            bench_name = training_benchmark[index + 1:]

            benchmark_dir = os.path.join(training_directory, suite_name)

            data = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                benchmark_dir, bench_name))
            if data:
                training_data[training_benchmark] = data

        if not training_data:
            log.error('Training features do not exist.')
            exit(1)

        test_data = {}
        for test_benchmark in test_benchmarks:
            index = test_benchmark.find('.')
            suite_name = test_benchmark[:index]
            bench_name = test_benchmark[index + 1:]

            benchmark_dir = os.path.join(test_directory, suite_name)

            data = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                benchmark_dir, bench_name))
            if data:
                test_data[test_benchmark] = data

        if not test_data:
            lg.error('Training features do not exist.')
            exit(1)

        training_data = pd.DataFrame.from_dict(training_data, orient='index')
        test_data = pd.DataFrame.from_dict(test_data, orient='index')

        return training_data, test_data
Пример #2
0
def execute(argv):
    """Generate random sequences for each benchmark"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Verify benchmark directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        suite_name = benchmark[:index]
        bench_name = benchmark[index + 1:]

        benchmark_dir = os.path.join(FLAGS.benchmarks_directory, suite_name,
                                     bench_name)

        if not os.path.isdir(benchmark_dir):
            continue

        results_dir = os.path.join(FLAGS.results_directory, suite_name)

        # Create the results directory for the suite
        try:
            os.makedirs(results_dir)
        except FileExistsError:
            pass

        filename = '{}/{}.yaml'.format(results_dir, bench_name)

        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        Engine.compilee(benchmark_dir, 'opt', '-{}'.format(FLAGS.baseline))

        features = Milepost.extract(benchmark_dir)

        # Engine.cleanup(benchmark_dir, 'opt')

        IO.dump_yaml(features, filename)
Пример #3
0
def execute(argv):
    """Create a small sequence"""

    del argv

    FLAGS = flags.FLAGS

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no benchmarks to process')

    # Create the sequences file
    results = {}
    counter = 0
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir = os.path.join(FLAGS.training_directory, bench_dir)

        filename = '{}/{}.yaml'.format(bench_dir, bench_name)

        sequences = IO.load_yaml_or_fail(filename)
        sequences = Sequence.get_the_best(sequences)

        for _, data in sequences.items():
            sequence = data['seq']
            if Sequence.exist(sequence, results):
                continue
            results[counter] = {'seq': sequence}
            counter += 1

    filename = '{}/{}'.format(FLAGS.results_directory,
                              FLAGS.sequences_filename)
    IO.dump_yaml(results, filename)
def find_sequences(test_benchmark, training_benchmaks, distance_directory,
                   training_data_directory, nof_sequences):
    """Get N sequences from the most similar benchmark"""

    # Test suite and benchmark
    index = test_benchmark.find('.')
    test_suite_name = test_benchmark[:index]
    test_bench_name = test_benchmark[index + 1:]

    # Find the training suites
    training_suites = []
    for training_benchmark in training_benchmaks:
        index = training_benchmark.find('.')
        training_suite_name = training_benchmark[:index]
        if training_suite_name not in training_suites:
            training_suites.append(training_suite_name)

    # Find the closer benchmark
    closer = []
    for training_suite in training_suites:
        d_directory = os.path.join(distance_directory, test_suite_name,
                                   training_suite)
        filename = '{}/{}.yaml'.format(d_directory, test_bench_name)
        distance_data = IO.load_yaml(filename)
        closer += [(distance, training_suite, training_bench)
                   for training_bench, distance in distance_data.items()]

    closer.sort()
    closer_suite_name = closer[0][1]
    closer_bench_name = closer[0][2]

    # Load closer benchmark data
    d_directory = os.path.join(training_data_directory, closer_suite_name)
    filename = '{}/{}.yaml'.format(d_directory, closer_bench_name)
    training_data = IO.load_yaml_or_fail(filename)

    # Rank sequences
    rank = [(seq_data['goal'], seq_key)
            for seq_key, seq_data in training_data.items()]
    rank.sort()

    # Extract N sequences
    best = {}
    for i, (_, seq_key) in enumerate(rank):
        best[seq_key] = training_data[seq_key].copy()
        if i + 1 == nof_sequences:
            break
    return closer_suite_name, best
Пример #5
0
def execute(argv):
    """Create a small sequence"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no benchmarks to process')

    # Verify directory
    if not os.path.isdir(FLAGS.benchmarks_directory):
        logging.error('Benchmarks directory {} does not exist.'.format(
            FLAGS.benchmarks_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.training_directory):
        logging.error('Training directory {} does not exist.'.format(
            FLAGS.training_directory))
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Initialize a BenchmarkReduction object
    bred = BenchmarkReduction(FLAGS.baseline, FLAGS.benchmarks_directory,
                              FLAGS.results_directory)

    # Reduce
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index + 1:]

        bench_dir = os.path.join(FLAGS.training_directory, bench_dir)

        sequences = IO.load_yaml_or_fail('{}/{}.yaml'.format(
            bench_dir, bench_name))
        sequence = Sequence.get_the_best(sequences)
        for _, seq_data in sequence.items():
            sequence = seq_data['seq']

        bred.run(benchmark, sequence)
def execute(argv):
    """Find the best K sequences, from training data."""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.fatal('There are no training benchmarks to process.')

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Verify directories
    if not os.path.isdir(FLAGS.training_directory):
        logging.error('Training directory {} does not exit.'.format(
            FLAGS.training_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.baseline_directory):
        logging.error('Baseline directory {} does not exit.'.format(
            FLAGS.baseline_directory))
        sys.exit(1)

    # Initialize a BestK object
    bestk = BestK(FLAGS.training_directory, FLAGS.baseline_directory)

    # Execute
    for k in tqdm(FLAGS.k, desc='Best-k'):
        filename = '{}/best_{}.yaml'.format(FLAGS.results_directory, k)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        bestk.run(benchmarks, FLAGS.compiler, FLAGS.baseline, int(k))

        # Store the results
        IO.dump_yaml(bestk.results, filename)

        # Store the number of the programs cover by each sequence
        filename = '{}/covering_{}.yaml'.format(FLAGS.results_directory, k)
        IO.dump_yaml(bestk.covering, filename)
def execute(argv):
    """Evaluate N sequences"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        # The benchmark
        index = benchmark.find('.')
        suite = benchmark[:index]
        bench_name = benchmark[index + 1:]

        # Create the results directory for the suite
        results_dir = os.path.join(FLAGS.results_directory, suite)

        try:
            os.makedirs(results_dir)
        except FileExistsError:
            pass

        # Verify report
        if FLAGS.suffix:
            output_filename = '{}/{}_{}.yaml'.format(results_dir, bench_name,
                                                     FLAGS.suffix)
        else:
            output_filename = '{}/{}.yaml'.format(results_dir, bench_name)

        if FLAGS.verify_report and os.path.isfile(output_filename):
            continue

        # Benchmark directory
        bench_dir = os.path.join(FLAGS.benchmarks_directory, suite, bench_name)

        if not os.path.isdir(bench_dir):
            logging.error('Benchmark {} does not exist.'.format(benchmark))
            sys.exit(1)

        # The training data
        training_dir = os.path.join(FLAGS.training_directory, suite)
        filename = '{}/{}.yaml'.format(training_dir, bench_name)

        sequences = IO.load_yaml_or_fail(filename)
        if not sequences:
            logging.error('There are no sequences to process')
            sys.exit(1)

        best_sequence = Sequence.get_the_best(sequences)

        # Verify if the best sequence is better than the baseline
        baseline_dir = os.path.join(FLAGS.baseline_directory, suite)
        filename = '{}/{}.yaml'.format(baseline_dir, bench_name)
        baseline_data = IO.load_yaml_or_fail(filename)
        if not baseline_data:
            logging.error('There are no baseline data')
            sys.exit(1)

        baseline_goal = baseline_data[FLAGS.compiler][FLAGS.baseline]['goal']
        for _, data in best_sequence.items():
            best_sequence_goal = data['goal']

        if not (best_sequence_goal < baseline_goal):
            continue

        sequences = split_sequence(best_sequence)

        # Process the sequences
        results = {}
        for key, data in sequences.items():
            goal_value = Engine.evaluate(
                Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                Sequence.name_pass_to_string(data['seq']), 'opt', bench_dir,
                FLAGS.working_set, FLAGS.times, FLAGS.tool,
                FLAGS.verify_output)
            results[key] = {'seq': data['seq'], 'goal': goal_value}

        # Store the results
        IO.dump_yaml(results, output_filename)
Пример #8
0
    def run(self, training_benchmarks, compiler, baseline, k):
        """
        Best-k

        Fast and effective orchestration of compiler optimizations
        for automatic performance tuning
        Z. Pan and R. Eigenmann
        International Symposium on Code Generation and Optimization
        2006
        10.1109/CGO.2006.38

        Argument
        --------
        training_benchmarks : list

        compiler : str

        baseline : str

        k : int
            Number of sequences
        """
        # Create the dictionary
        dictionary = {}
        best_sequences = {}
        for training_benchmark in training_benchmarks:
            index = training_benchmark.find('.')
            bench_dir = training_benchmark[:index]
            bench_name = training_benchmark[index + 1:]

            training_dir = os.path.join(self.__flags.training_directory,
                                        bench_dir)
            baseline_dir = os.path.join(self.__flags.baseline_directory,
                                        bench_dir)

            training_sequences = IO.load_yaml('{}/{}.yaml'.format(
                training_dir, bench_name))

            if not training_sequences:
                continue

            baseline_goal_value = IO.load_yaml_or_fail('{}/{}.yaml'.format(
                baseline_dir, bench_name))
            baseline_goal_value = baseline_goal_value[compiler][baseline][
                'goal']

            # For each sequence
            for seq in training_sequences.keys():
                if seq not in dictionary.keys():
                    dictionary[seq] = []
                    best_sequences[seq] = training_sequences[seq]['seq']

                goal_value = training_sequences[seq]['goal']

                # Store the fitness
                if goal_value < baseline_goal_value:
                    improvement = ((baseline_goal_value - goal_value) /
                                   baseline_goal_value) * 100
                    dictionary[seq].append((training_benchmark, improvement))

        # Find the best dictionary entries
        if dictionary:
            bestk = []
            self.__covering = {}
            for _ in range(k):

                progs = []
                for _, data in dictionary.items():
                    progs += [p for p, _ in data if p not in progs]
                if len(progs) == 0:
                    break

                key = self.__get_maximum(dictionary)
                dictionary_entry = dictionary[key].copy()
                self.__covering[key] = len(dictionary_entry)

                bestk.append(key)

                for key, data in dictionary.items():
                    for program, improvement in dictionary_entry:
                        index = self.__program_in_dictionary(program, data)
                        if index > -1:
                            del dictionary[key][index]

            # Store the best k sequences
            self.__results = {}
            for best in bestk:
                self.__results[best] = {'x': best_sequences[best]}
def execute(argv):
    """Find the euclidean distance from test to training data."""

    del argv

    FLAGS = flags.FLAGS

    # The training benchmarks
    training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename)
    if not training_benchmarks:
        logging.error('There are no training benchmarks to process')
        sys.exit(1)

    # The training benchmarks
    test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename)
    if not test_benchmarks:
        logging.error('There are no test benchmarks to process')
        sys.exit(1)

    # Verify directories
    if not os.path.isdir(FLAGS.training_representation_directory):
        logging.error('Training directory {} does not exist.'.format(
            FLAGS.training_representation_directory))
        sys.exit(1)

    if not os.path.isdir(FLAGS.test_representation_directory):
        logging.error('Test directory {} does not exist.'.format(
            FLAGS.test_representation_directory))
        sys.exit(1)

    # Measure the distance
    if FLAGS.distance == 'euclidean':
        distance = Distance.euclidean(training_benchmarks,
                                      FLAGS.training_representation_directory,
                                      test_benchmarks,
                                      FLAGS.test_representation_directory)
    if FLAGS.distance == 'manhattan':
        distance = Distance.manhattan(training_benchmarks,
                                      FLAGS.training_representation_directory,
                                      test_benchmarks,
                                      FLAGS.test_representation_directory)

    if FLAGS.distance == 'cosine':
        distance = Distance.cosine(training_benchmarks,
                                   FLAGS.training_representation_directory,
                                   test_benchmarks,
                                   FLAGS.test_representation_directory)

    # Store the distance
    for i, test_bench in enumerate(tqdm(test_benchmarks, desc='Processing')):
        index = test_bench.find('.')
        test_suite_name = test_bench[:index]
        test_bench_name = test_bench[index + 1:]

        results = {}
        for j, training_bench in enumerate(training_benchmarks):
            index = training_bench.find('.')
            training_suite_name = training_bench[:index]
            training_bench_name = training_bench[index + 1:]

            if training_suite_name not in results:
                results[training_suite_name] = {}

            results[training_suite_name][training_bench_name] = float(
                distance[i][j])

        for training_suite_name, training_distance in results.items():
            results_dir = os.path.join(FLAGS.results_directory,
                                       test_suite_name, training_suite_name)

            # Create the results directory
            try:
                os.makedirs(results_dir)
            except FileExistsError:
                pass

            filename = '{}/{}.yaml'.format(results_dir, test_bench_name)
            IO.dump_yaml(training_distance, filename)
def execute(argv):
    """Evaluate N sequences"""

    del argv

    FLAGS = flags.FLAGS

    # The benchmarks
    benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename)
    if not benchmarks:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # The sequences
    sequences = IO.load_yaml_or_fail(FLAGS.sequences_filename)
    if not sequences:
        logging.error('There are no benchmarks to process')
        sys.exit(1)

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Process each benchmark
    for benchmark in tqdm(benchmarks, desc='Processing'):
        index = benchmark.find('.')
        bench_dir = benchmark[:index]
        bench_name = benchmark[index+1:]

        bench_in_dir = os.path.join(FLAGS.benchmarks_directory,
                                    bench_dir,
                                    bench_name)

        if not os.path.isdir(bench_in_dir):
            continue

        bench_out_dir = os.path.join(FLAGS.results_directory,
                                     bench_dir)

        # Create the results directory for the suite
        try:
            os.makedirs(bench_out_dir)
        except FileExistsError:
            pass

        # Verify report
        if FLAGS.suffix:
            filename = '{}/{}_{}.yaml'.format(
                bench_out_dir,
                bench_name,
                FLAGS.suffix
            )
        else:
            filename = '{}/{}.yaml'.format(bench_out_dir, bench_name)
        if FLAGS.verify_report and os.path.isfile(filename):
            continue

        # Process the sequences
        results = {}
        for key, data in sequences.items():
            goal_value = Engine.evaluate(
                Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                Sequence.name_pass_to_string(data['seq']),
                'opt',
                bench_in_dir,
                FLAGS.working_set,
                FLAGS.times,
                FLAGS.tool,
                FLAGS.verify_output
            )
            results[key] = {'seq': data['seq'], 'goal': goal_value}

        # Store the results
        IO.dump_yaml(results, filename)
def execute(argv):
    """Generate random sequences for each benchmark"""

    FLAGS = flags.FLAGS

    results_directory = FLAGS.results_directory

    # Test benchmarks
    test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename)
    if not test_benchmarks:
        logging.fatal('There are no test benchmarks to process')

    # Training benchmarks
    training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename)
    if not training_benchmarks:
        logging.fatal('There are no training benchmarks to process')

    # Create the results directory
    try:
        os.makedirs(FLAGS.results_directory)
    except FileExistsError:
        pass

    # Extract the representation for test programs
    print(bold('1. EXTRACTING THE REPRESENTATION'))
    FLAGS.results_directory = os.path.join(results_directory, 'representation')
    FLAGS.benchmarks_filename = FLAGS.test_benchs_filename
    representation.execute(argv)

    # Distance: test --> training
    print(bold('2. MEASURING THE DISTANCE'))
    distance_results_directory = os.path.join(results_directory, 'distance')
    FLAGS.results_directory = distance_results_directory
    FLAGS.test_representation_directory = os.path.join(results_directory,
                                                       'representation')
    distance.execute(argv)

    # Process test benchmarks
    print(bold('3. PROCESSING THE BENCHMARKS'))
    for nof_sequences in tqdm(FLAGS.nof_sequences, desc='Processing'):
        for test_benchmark in test_benchmarks:
            index = test_benchmark.find('.')
            suite_name = test_benchmark[:index]
            bench_name = test_benchmark[index + 1:]

            # Find the best N sequences
            training_suite, sequences = find_sequences(
                test_benchmark, training_benchmarks,
                distance_results_directory, FLAGS.training_data_directory,
                int(nof_sequences))

            # Goal_name
            if len(FLAGS.goals) > 1:
                goal_name = '_'.join(FLAGS.goals)
            else:
                goal_name = FLAGS.goals[0]

            # Create the results directory for the suite
            results_dir = os.path.join(results_directory,
                                       'predictive_compilation',
                                       training_suite, goal_name)
            try:
                os.makedirs(results_dir)
            except FileExistsError:
                pass

            filename = '{}/{}_j{}.yaml'.format(results_dir, bench_name,
                                               nof_sequences)

            if FLAGS.verify_report and os.path.isfile(filename):
                continue

            results = {}

            for key, data in sequences.items():
                goal_value = Engine.evaluate(
                    Goals.prepare_goals(FLAGS.goals, FLAGS.weights),
                    Sequence.name_pass_to_string(data['seq']), 'opt',
                    os.path.join(FLAGS.benchmarks_directory, suite_name,
                                 bench_name), FLAGS.working_set, FLAGS.times,
                    FLAGS.tool, FLAGS.verify_output)
                results[key] = {'seq': data['seq'], 'goal': goal_value}

            IO.dump_yaml(results, filename)