Exemplo n.º 1
0
 def visit_any_node(self, node, node_type):
     Log.DEBUG(f'Visiting {node_type} node')
     if self.should_mutate():
         Log.DEBUG(f'Mutating node {node}')
         self._num_mutations += 1
         selected_mutation = self.select_random_mutation()
         Log.DEBUG(f'Selected mutation: {selected_mutation}')
         return selected_mutation(node)
     else:
         Log.DEBUG('Returning original node.')
         return node
Exemplo n.º 2
0
    def __init__(self, population, source=None, source_tree=None):

        # assign a uuid to this chromosome and then increment the static uuid counter
        self.uuid = Chromosome.next_uuid
        Chromosome.next_uuid += 1

        # require that either source code or an AST is provided
        if source is None and source_tree is None:
            raise Exception('Attempted to initialize a chromosome without source code or a source code AST.')
        self.source = source
        self.source_tree = source_tree

        if self.source is not None:
            Log.DEBUG('Initialized new chromosome from source')
            assert(type(self.source) == str)
        # if source code is not provided, generate it from the provided AST
        else:
            Log.DEBUG('Initialized new chromosome from AST')
            self.source = self.generate_source()

        # flag to track if a timeout occurs while running unit tests (indicating an error)
        self.timeout_occurred = False
        self.population = population

        # run the unit tests to gather information on the number of failing tests
        # tests should be run before the bandit report is run
        # this is because generating the bandit report is very expensive and we would like to check if the
        # unit tests fail and we can skip the bandit report altogether before attempting to generate it
        self.test_results = self.run_tests(population.simulation.test_file, population.simulation.repo)

        self.bandit_report = None
        if not (self.timeout_occurred or self.test_results.crashed or self.test_results.nran == 0):
            # generate a bandit report for information on security vulnerabilities in the given source code
            self.bandit_report = bp.BanditReport(source=source, auto_analyze=True, auto_parse=True)

        self.fitness = float('inf')

        # compute the fitness using the bandit report and test results
        self._compute_fitness()

        # check if this chromosome is a new local or global minimum and if so set the correct flags
        if self.fitness < self.population.simulation.best_fitness:
            self.population.simulation.best_fitness = self.fitness
            self.population.simulation.best_source = self.source

        if self.fitness < self.population.simulation.best_fitness_seen:
            self.population.simulation.best_fitness_seen = self.fitness
            self.population.simulation.best_source_seen = self.source
Exemplo n.º 3
0
 def visit_any_node(self, node, node_type):
     Log.DEBUG(f'Visiting node {node} with mutations turned off.')
     if node_type in self.nodes:
         self.nodes[node_type].append(node)
     else:
         self.nodes[node_type] = [node]
     self.generic_visit(node)
Exemplo n.º 4
0
    def generate_starting_population(self):
        Log.INFO('Generating random population.')
        # create a new empty population
        self.population = Population(self)

        # add population_size chromosomes to the population and randomly mutate them, 
        # leaving one unchanged and requiring all other be mutated at least once
        chromosome = Chromosome(self.population, source=self.source)
        Log.DEBUG(f'Original source code for file {self.source_file} has a fitness of {chromosome.fitness}.')

        self.population.add_chromosome(chromosome)
        mutator = Mutator(mutation_frequency=5)
        while(self.population.size < self.population_size):
            # mutate the source code
            # Mutator.mutate does not return until at least one mutation has been made when called with require_mutation=True
            mutated_source = mutator.mutate(self.source, require_mutation=True)
            self.population.add_chromosome(mutated_source)
        assert(self.population.size == self.population_size)

        Log.INFO('Finished generating starting population.')
Exemplo n.º 5
0
    def mutate(self, source, require_mutation=False):
        Log.DEBUG('Mutating source code...')
        new_source = None
        assert (type(source) == str)
        Log.DEBUG(f'Number of characters: {len(source)}')

        original_tree = None
        # attempt to parse the source code to an AST
        # if this fails, the source code is not valid python, so return the original source without mutating
        # this is because it is highly unlikely any mutations will fix any errors in the code, and broken source code 
        # will cause the resulting chromosome to have a fitness of inf
        try:
            original_tree = ast.parse(source)
        except SyntaxError:
            return source

        # collect the nodes from the tree in advance so we can use them for substitutions later
        Log.DEBUG('Collecting nodes.')
        self.collect_nodes(original_tree)
        
        # count the number of mutations in case we want to enforce that at least k mutations be made
        count = 0
        Log.DEBUG('Beginning mutation process')

        # continue looping as long as our new source code is not yet valid or we have not made enough mutations
        while new_source is None or (require_mutation and self._num_mutations < 1):
            count += 1
            Log.DEBUG(f'Mutation iteration: {count}')

            # create a new tree from the original source code
            # TODO: optimize
            tree = ast.parse(source)

            # visit the tree again, this time making mutations
            Log.DEBUG('Visiting tree with mutations turned on.')
            # note that no explicit mutations are being made here, as those are mode inside of the visit_[Node] methods
            self.visit(tree)
            Log.DEBUG(f'Mutations made before parsing back to source: {self._num_mutations}')

            # attempt to convert the AST back to source
            # if it cannot be done then reset to the original source code and try mutating again
            try:
                new_source = astor.to_source(tree)
            except AttributeError as e:
                Log.DEBUG('Attribute Error occured while attempting to parse AST back to source.')
                Log.DEBUG(f'Exception message: {e}')
                self._num_mutations = 0
                new_source = None
            except Exception as e:
                Log.DEBUG('Unable to parse mutated AST back to source.')
                Log.DEBUG(f'Exception message: {e}')
                self._num_mutations = 0
                new_source = None
            Log.DEBUG(f'Source is None: {new_source is None}, Require mutation: {require_mutation}, Mutations made: {self._num_mutations}')

        Log.DEBUG(f'Number of mutations made: {self._num_mutations}')
        # reset the mutators flags and namespace so it can be used again
        self._reset()
        
        return new_source
Exemplo n.º 6
0
 def replace_node_with_random(self, node):
     Log.DEBUG(f'Replacing node with call to random number generator.')
     return ast.Call(func=ast.Attribute(value=ast.Name(id='int'), attr='from_bytes'),
                     args=[ast.Call(func=ast.Attribute(value=ast.Name(id='os'), attr='urandom'), args=[ast.Num(n=1)], keywords=[])],
                     keywords=[ast.keyword(arg='byteorder', value=ast.Str(s='big'))])
Exemplo n.º 7
0
 def should_mutate(self):
     if random.randint(1, 100) <= self.mutation_frequency:
         Log.DEBUG(f'Choosing to mutate.')
         return True
     Log.DEBUG(f'Choosing not to mutate.')
     return False
Exemplo n.º 8
0
 def generate_source(self):
     Log.DEBUG('Generating source for chromosome from AST')
     assert(self.source_tree is not None)
     return astor.to_source(self.source_tree)