Пример #1
0
def find_patterns_in_sequences(file_path, file_name):
    """Searches for frequently occurring subsequences in the sequences listed
    in the specified file. The results are written to a new file in the
    specified path.
    """
    sequences = pprof_utilities.read_sequences(file_path, file_name)
    occurrences = __find_frequent_subsequences(sequences)
    keys = [k for k, v in occurrences.items() if v >= 2]
    total_num_sequences = len(sequences)
    frequent_occurrences = dict()

    for pattern in keys:
        if occurrences[pattern] / total_num_sequences >= 0.2:
            frequent_occurrences[pattern] = occurrences[pattern]

    with open(file_path + 'subsequence_occurrences.csv',
              'w', newline='') as \
            file:
        w = csv.writer(file, delimiter=' ')
        w.writerow(['Subsequence', 'Occurrences', 'TotalNumberSequences'])

        for pattern in keys:
            w.writerow([
                str(pattern),
                str(occurrences[pattern]),
                str(total_num_sequences)
            ])
Пример #2
0
def evaluate_best_sequence(program):
    """"Generates optimization sequences from a dependency graph and calculates
    the best of these sequences for the specified program."""
    log = logging.getLogger()
    # Get different topological sorting arrangements.
    sequences = pprof_utilities.read_sequences(SEQUENCE_FILE_PATH,
                                               SEQUENCE_FILE, SEQUENCE_PREFIX)
    possible_sequences = len(sequences)
    seq_to_fitness = multiprocessing.Manager().dict()
    pool = multiprocessing.Pool()

    # Calculate the fitness value of the topological sorting arrangements.
    for sequence in sequences:
        pool.apply_async(calculate_fitness_value,
                         args=(sequence, seq_to_fitness, str(sequence),
                               program))

    pool.close()
    pool.join()

    # Get the best sequences.
    sequences.sort(key=lambda s: seq_to_fitness[str(s)])
    sequences = sequences[::-1]
    fittest = sequences.pop()
    fittest_fitness_value = seq_to_fitness[str(fittest)]
    fittest_sequences = [fittest]
    equal = True

    while sequences and equal:
        other = sequences.pop()
        if seq_to_fitness[str(other)] == fittest_fitness_value:
            fittest_sequences.append(other)
        else:
            equal = False

    log.info("Best sequences " + str(len(fittest_sequences)) + " of " +
             str(possible_sequences))
    for sequence in fittest_sequences:
        log.info("Best: " + str(sequence))
    log.info(
        "----------------------------------------------------------------")

    return random.choice(fittest_sequences)
Пример #3
0
def find_most_frequent_sequence():
    """Search the heuristic-compilestats files for frequently occurring
    best sequences.
    """
    log = logging.getLogger(__name__)
    sequence_to_programs = dict()
    number_programs = 0

    for file in os.listdir(FILE_PATH):
        if not file.startswith('raw.') and file.endswith(
                '.heuristic-compilestats.raw'):
            sequences = pprof_utilities.read_sequences(FILE_PATH, str(file),
                                                       prefix=PREFIX)
            if sequences:
                number_programs += 1
            for sequence in sequences:
                sequence_tuple = tuple(sequence)
                if sequence_tuple in sequence_to_programs:
                    sequence_to_programs[sequence_tuple] += 1
                else:
                    sequence_to_programs[sequence_tuple] = 1

    sequences = sorted(sequence_to_programs,
                       key=lambda key: sequence_to_programs[key])
    best = sequences.pop()
    best_sequences = [best]
    frequency = sequence_to_programs[best]

    for sequence in sequences:
        if sequence_to_programs[sequence] == frequency:
            best_sequences.append(sequence)

    log.info("Number of best sequences: %s", str(len(best_sequences)))
    log.info("Most frequently occurring sequence:")
    log.info(best)
    log.info("Occurrences: %s of %d",
             str(sequence_to_programs[best]),
             str(number_programs))

    for sequence in best_sequences:
        log.info("Best: %s", str(list(sequence)))
Пример #4
0
def __create_flag_statistics_csv(file_path, file_name, csv_name=FLAG_CSV):
    """Creates csv file that lists the occurrences of flags in the sequences
    found in the specified file.
    """
    total_flag_occurrence = {}
    flag_occurrence_seq = {}
    flag_count = 0
    sequences = pprof_utilities.read_sequences(file_path, file_name)

    for sequence in sequences:
        flag_occurred = {}
        for flag in sequence:
            flag_count += 1
            total_flag_occurrence[
                flag] = 1 if flag not in total_flag_occurrence \
                else total_flag_occurrence[flag] + 1

            if flag not in flag_occurred:
                flag_occurred[flag] = True
                flag_occurrence_seq[
                    flag] = 1 if flag not in flag_occurrence_seq \
                    else flag_occurrence_seq[flag] + 1

    # Write the gathered information in a new csv file.
    with open(file_path + csv_name, 'w', newline='') as csvfile:
        w = csv.writer(csvfile, delimiter=' ')
        w.writerow([
            'Flag', 'Sequences', 'Total', 'TotalNumberSequences',
            'TotalNumberFlags'
        ])

        for flag in total_flag_occurrence:
            w.writerow([
                str(flag),
                str(flag_occurrence_seq[flag]),
                str(total_flag_occurrence[flag]),
                str(len(sequences)),
                str(flag_count)
            ])