Exemplo n.º 1
class code_dev_simulation():
    The main model class for simulating a software development process.

    A directed graph is created to keep track of code references, while the java source code
    is stored internally in an AST

        iterations (int): Total amount of iterations to simulate
        fitness_method (int): The method indication to define the fitness metric
        probabilities (dict<string, int>): Maps probability to a certain function
        exp_condition (string): String of the experiment condition
        add_state (float): Probability of adding a statement
        logging (boolean): Indicates whether a custom gource log should be made
        pref_attach_condition (integer): Condition for what kind of preferential attachment to use:
            0 = Caller and callee use pref attachment
            1 = Only caller
            2 = Only callee
            3 = No preferential attachment
    def __init__(self, iterations, fitness_method, probabilities, exp_condition, add_state, logging, pref_attach_condition):
        # params
        self.iterations = iterations
        self.fitness_method = fitness_method
        self.probabilities = probabilities
        self.exp_condition = exp_condition
        self.add_state = add_state
        self.pref_attach_condition = pref_attach_condition

        # initialisations
        self.start = time.time()
        self.running = True
        self.logging = logging
        self.step_n = 0
        self.possible_actions = [self.create_method, self.call_method, self.update_method, self.remove_method]
        self.changes = []
        self.fitness = []

        # AST modifications
        self.AST = AST()

        # AST log file
        self.create_dir_prob = 0.5
        self.directory_map = {}

        # Create basic AST from .java file and parse to a directed reference graph
        self.reference_graph = DiGraph()
        self.classes = []

        # Analysis lists
        self.list_fmin = []
        self.list_action = []
        self.list_fitnesses = []
        self.list_fit_stats = []
        self.total_code_size = []

    def get_tree(self):
        Parse a .java file and get its declarations, then initialise the references
        with open(path.join(path.dirname(__file__), './', 'app.java')) as j_file:
            parser = Parser()
            tree = parser.parse_file(j_file)
            initial_classes = tree.type_declarations


    def initialise_references(self, initial_classes):
        Initialise the reference graph by adding methods in the initial java file as nodes
        They are initialised with a method name, their class, and a fitness value
        The classes are added to a list

            initial_classes (list): The initial classes to instantiate the reference graph with
        for java_class in initial_classes:
            self.directory_map[java_class.name] = ''
            self.append_log_line('A', '/' + java_class.name, 'w')
            for java_element in java_class.body:
                if isinstance(java_element, MethodDeclaration):
                    self.reference_graph.add_node(java_element.name, data={'method': java_element, 'class': java_class, 'fitness': self.get_fitness(), 'lines': 0})
                    self.append_log_line('M', '/' + java_class.name)

    def get_fitness(self):
        Gets fitness based on fitness method

        fitness_method == 0: returns random number between 0 and 1 (uniform distribution)
        if self.fitness_method == 0:
            return np.random.random()

    def run_model(self):
        Run function that completely runs the model
        for _ in range(self.iterations):
            self.step_n += 1

    def step(self):
        Step function for every iteration

        Uses probabilities dictionary to determine which function to use
        action = self.sample(self.possible_actions, self.get_probabilities())
        delta_change = 0

        while delta_change == 0:
            action = self.sample(self.possible_actions, self.get_probabilities())
            delta_change = action()


        return delta_change

    def save_states(self, action):
        Saves the state of the model every step

            action (function): The function that is called this step
        fitnesses = self.get_fitnesses()

    def create_method(self):
        Creates a method

        Uses probabilities['create_class] to determine whether to create the method in a new class
        A class is selected, then created using the method in the AST class:
            AST.create_method(class)        returns: the new method
        Its class is added to the classes list.
        The method is finally added to the reference graph with its class and fitness as properties

            The number of changes made

            - Should a statement automatically be created when a method is created?
              Consistent empty method creation does not make much sense
            - But it also doesn't really matter what's in the method right?
        if np.random.random() <= self.probabilities['create_class']:
            selected_class = self.create_class()
            changes = 2
            n_methods = self.get_method_amounts()
            probabilities = list(np.array(n_methods)/np.sum(n_methods))
            selected_class = self.sample(self.classes, probabilities)
            changes = 1
        method = self.AST.create_method(selected_class)
        fitness = self.get_fitness()

        self.reference_graph.add_node(method.name, data={'method': method, 'class': selected_class, 'fitness': fitness, 'lines': 0})
        self.append_log_line('M', self.directory_map[selected_class.name] + '/' + selected_class.name)

        # Add statement(s) based on probability to new method here? It is created empty

        # Method created and class created/modified
        return changes

    def call_method(self):
        Sample two random functions from the reference graph and use them to determine a caller and callee
        Use AST class to create a reference:
                caller method, callee method, callee class
            )       returns: the new reference (does not have to be used)
        Set a new fitness for the caller method
        Then add a new edge from the caller to the callee in the reference graph

        A method does not call itself, and does not call another method twice

            The number of changes made
        methods = []
        sizes = []
        in_degrees = []
        for node, in_degree in self.reference_graph.in_degree():
            data = self.reference_graph.node[node]['data']

        sizes = [1 if s == 0 else s+1 for s in sizes]
        in_degrees = [1 if s == 0 else s+1 for s in in_degrees]
        caller_method_probabilities = list(np.array(sizes)/np.sum(sizes))
        callee_method_probabilities = list(np.array(in_degrees)/np.sum(in_degrees))

        # Preferential attachment 2
        if self.pref_attach_condition == 0:
            caller_info = self.sample(methods, caller_method_probabilities)
            callee_info = self.sample(methods, callee_method_probabilities)
        # Caller pref attachment
        elif self.pref_attach_condition == 1:
            caller_info = self.sample(methods, caller_method_probabilities)
            callee_info = np.random.choice(methods)
        # callee pref attachment
        elif self.pref_attach_condition == 2:
            caller_info = np.random.choice(methods)
            callee_info = self.sample(methods, callee_method_probabilities)
        # No preferential attachment
        elif self.pref_attach_condition == 3:
            caller_info = np.random.choice(methods)
            callee_info = np.random.choice(methods)

        if self.exp_condition != 'reproduce':
            callee_info = self.find_callee(caller_info, callee_info, methods, callee_method_probabilities, sizes,
        if callee_info is None:
            return 0

            caller_info['method'], callee_info['method'], callee_info['class']

        caller_info['fitness'] = self.get_fitness()
        caller_info['lines'] += 1
        self.reference_graph.add_edge(caller_info['method'].name, callee_info['method'].name)
        self.append_log_line('M', self.directory_map[caller_info['class'].name] + '/' + caller_info['class'].name)
        return 1

    def call_exists(self, callee_method, caller_method):
        Checks whether a call to callee is already being made in the caller

        Returns: (bool) True if a call is already being made
        callee_class = callee_method['class']
        callee_method = callee_method['method']
        caller_method = caller_method['method']
        for stmt in caller_method.body:
            if isinstance(stmt, ExpressionStatement) and\
                        isinstance(stmt.expression, MethodInvocation) and\
                        stmt.expression.name == callee_method.name and\
                        stmt.expression.target.value == callee_class.name:
                return True
        return False

    def find_callee(self, caller_info, callee_info, methods, callee_method_probabilities, sizes, in_degrees):
        Find a method to call that is not equal to the caller and has not been called by the caller already

            caller_info: caller function node
            callee_info: callee function node
            methods (list): list of all the available methods to call
            callee_method_probabilities (list<float>): Probabilities for every function to be called
            sizes (list<int>): List of all sizes of methods
            in_degrees (list<int>): List of all the references a method has

            Callee method or None if no methods available
        while caller_info['method'].name == callee_info['method'].name or self.call_exists(callee_info,caller_info):

            callee_info = self.sample(methods, callee_method_probabilities)
            callee_method_probabilities = list(np.array(in_degrees)/np.sum(in_degrees))
            if len(methods) == 0:
                return None
        return callee_info

    def update_method(self):
        Pick an unfit method using pick_unfit_method()
        Then either use add or delete statement from the AST class:
            AST.add_statement(method)       returns: void
            AST.delete_statement(method)    returns: void

        Currently 50% chance to either add or delete statement

        Finally assign a new fitness to the method

            The number of changes made

            - Design conditions to add or delete statements
            - Think the chance to add a statement should be higher, since repos
            - grow?
        node = self.pick_unfit_method()
        method = node['data']['method']
        change = 0
        if np.random.random() <= self.add_state:
            self.reference_graph.node[node['data']['method'].name]['data']['lines'] += 1
            change = 1
            stmt = self.pick_statement(method)
            if stmt:
                self.AST.delete_statement(method, stmt)
                self.reference_graph.node[node['data']['method'].name]['data']['lines'] -= 1 if self.reference_graph.node[node['data']['method'].name]['data']['lines'] > 0 else 0
                change = -1
        self.append_log_line('M', self.directory_map[node['data']['class'].name] + '/' + node['data']['class'].name)
        self.reference_graph.node[node['data']['method'].name]['data']['fitness'] = self.get_fitness()
        return change

    def pick_statement(self, method):
        Pick a statement and return it

        Currently a random statement (variable declaration) is picked in the method uniformly

            method: Method to pick a statement from

            Statement or None if method no statements
        declarations = []
        for java_element in method.body:
                if isinstance(java_element, VariableDeclaration):
        if len(declarations) > 0:
            return np.random.choice(declarations)
            return None

    def remove_method(self, node=None):
        Deletes a method and deletes the method call from its callers.
        If a caller becomes empty after deleting the method, delete the caller as well and the deletion propagates

        References graph can be used to determine the references and they can be deleted using the AST class:
            AST.delete_reference(caller node, method, class node)
        After deleting all callers, the method can be deleted using:
            AST.delete_method(class node, method)
        Remove the method node from the reference graph

            node: The method node to be deleted. If None, choose one using pick_unfit_method
            The number of changes made
        change_size = 0
        if len(self.reference_graph.nodes()) == 1:
            return 0
        if node is None:
            node = self.pick_unfit_method()

        method_info = node
        class_node = node['data']['class']
        method = node['data']['method'].name

        void_callers = []
        for caller in self.reference_graph.predecessors(method):
            if caller != method:
                # Get the caller and delete the reference
                caller_info = self.reference_graph.node[caller]['data']
                caller_node = caller_info['method']
                self.AST.delete_reference(caller_node, method_info['data']['method'], class_node)
                self.append_log_line('M', self.directory_map[method_info['data']['class'].name] + '/' + method_info['data']['class'].name)
                # If the caller has become empty, add it to the queue to delete later
                if len(caller_node.body) == 0:
                    # Otherwise change its fitness
                    caller_info['fitness'] = self.get_fitness()
                    caller_info['lines'] -= 1
                    change_size -= 1
        # Delete the method after deleting all the invocation statements
        self.AST.delete_method(class_node, method_info['data']['method'])
        self.append_log_line('M', self.directory_map[method_info['data']['class'].name] + '/' + method_info['data']['class'].name)
        if len(class_node.body) == 0:
            self.append_log_line('D', self.directory_map[class_node.name] + '/' + class_node.name)
            del self.directory_map[class_node.name]
        change_size -= 1

        # Then recursively do the same for all the methods that have become empty in the process
        for caller in void_callers:
            change_size += self.remove_method(self.get_node_data(caller))
        return change_size

    def create_class(self):
        Create class using AST:
            AST.create_class()      returns: Newly created class

            Created class
        class_node = self.AST.create_class()
        self.directory_map[class_node.name] = self.get_dir()
        self.append_log_line('A', self.directory_map[class_node.name] + '/' + class_node.name)
        return class_node

    def pick_unfit_method(self):
        Picks a method based on its (low) fitness

        Currently selects the method with lowest fitness

        method_fitness = ('empty', 2)
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        for method_node in self.reference_graph.__iter__():
            node_data = nodes_dict[method_node]
            fitness = node_data['data']['fitness']

            if fitness < method_fitness[1]:
                method_fitness = (node_data, fitness)

        return method_fitness[0]

    def get_fmin(self):
        copy of pick_unfit_method
        Currently selects the method with lowest fitness

        returns fmin
        method_fitness = ('empty', 2)
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        for method_node in self.reference_graph.__iter__():
            node_data = nodes_dict[method_node]['data']
            fitness = node_data['fitness']
            if fitness < method_fitness[1]:
                method_fitness = (node_data, fitness)
        return method_fitness[1]

    def get_node_data(self, node):
        Return node data from the reference graph

            node (string): The name of the node to return the data of

            Data of the node defined in the reference graph
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        return nodes_dict[node]

    def sample(self, elements, probalities):
        Sample a list of elements using a list of probalities (same order)

            elements (list<any>): List of elements to sample
            probalities (list<float>): List of floats that specify the weight for each element in elements

            Sampled element from elements
        return np.random.choice(elements, p=probalities)

    def get_method_amounts(self):
        Get the amount of methods in all classes

        Returns (list<int>) List of amount of methods in same order as self.classes
        class_n_methods = []
        for java_class in self.classes:
            n_methods = 0
            for java_element in java_class.body:
                if isinstance(java_element, MethodDeclaration):
                    n_methods += 1
        return class_n_methods

    def get_probabilities(self):
        Get a list of the four method probabilities in order:
        Create, call, update, and delete respectively

            List<float> probabilities of four method manipulation functions
        return [

    def get_fitnesses(self):
        copy of pick_unfit_method
        Currently selects the method with lowest fitness

            list of fitnesses

        nodes_dict = dict(self.reference_graph.nodes(data=True))
        return [nodes_dict[method_node]['data']['fitness'] for method_node in self.reference_graph.__iter__()]

    def fitness_stats(self, fitnesses):
        Gets list of fitnesses of the methods as input
        Returns a list of mean, std, min, max values
        fitnesses = np.array(fitnesses)
        return [len(fitnesses), fitnesses.mean(), fitnesses.std(), fitnesses.min(), fitnesses.max()]

    def get_total_code_size(self):
        Save the total code base size (total amount of method lines) by summing the lines of each method
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        return sum([nodes_dict[method_node]['data']['lines'] for method_node in self.reference_graph.__iter__()])

    def get_dir(self):
        if self.logging == False:
            return ''

        if np.random.random() <= self.create_dir_prob:
            num = len(self.directory_map.values())
            if np.random.random() <= 0.6:
                nlp = list(self.directory_map.values()).copy()
                if '' in nlp:
                if len(nlp) == 0:
                    nlp.append('/dir_' + str(int(np.random.random() * 10000)))
                return np.random.choice(nlp) + self.get_dir()
            return '/dir_' + str(num)
            return np.random.choice(list(self.directory_map.values()))

    def append_log_line(self, action, fl, param='a+'):
        if self.logging == False:
        time = self.start + self.step_n * 100
        user = '******' + str(np.random.randint(1, 10))

        f = open("./vid/code.log", param)
        f.write(str(int(time)) + '|' + user + '|' + action + '|' + fl + '\n')
Exemplo n.º 2
class Evolver:
    def __init__(self,
        :param initial_classes: assuming these classes has no method calls to or inheritances from each other
        :return: None
        # params
        self.iterations = iterations
        self.fitness_method = fitness_method
        self.probabilities = probabilities
        self.exp_condition = exp_condition
        self.add_state = add_state

        if initial_classes is None:
            with open(path.join(path.dirname(__file__), '..',
                                'App.java')) as java_file:
                parser = Parser()
                tree = parser.parse_file(java_file)
                initial_classes = tree.type_declarations
        self.p_create_class = 0.1
        self.p_no_inherit = 0.2
        self.code_modifier = CodeModifier()
        self.inheritance_graph = DiGraph()
        self.reference_graph = DiGraph()
        self.fitness = []
        self.step_n = 0

        for c in initial_classes:
            self.inheritance_graph.add_node(c.name, {'class': c})
            for m in c.body:
                if isinstance(m, MethodDeclaration):
                    self.reference_graph.add_node(m.name, {
                        'method': m,
                        'class': c,
                        'fitness': random(),
                        'lines': 0

        # Analysis lists
        self.list_fmin = []
        self.list_action = []
        self.list_fitnesses = []
        self.list_fit_stats = []
        self.total_code_size = []
        self.changes = []

    def save_states(self, action):
        Saves the state of the model every step

            action (function): The function that is called this step
        # self.list_fmin.append(self.get_fmin())
        fitnesses = self.get_fitnesses()

    def get_total_code_size(self):
        Save the total code base size (total amount of method lines) by summing the lines of each method
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        # fitnesses = [data['fitness'] for method, data in self.reference_graph.nodes_iter(True)]

        return sum([
            for method_node in self.reference_graph.__iter__()

    def fitness_stats(self, fitnesses):
        Gets list of fitnesses of the methods as input
        Returns a list of mean, std, min, max values
        fitnesses = np.array(fitnesses)
        return [

    # def get_fitnesses(self):
    #     """
    #     copy of pick_unfit_method
    #     Currently selects the method with lowest fitness
    #     Returns:
    #         list of fitnesses
    #     """
    #     nodes_dict = dict(self.reference_graph.nodes(data=True))
    #     return [nodes_dict[method_node]['data']['fitness'] for method_node in self.reference_graph.__iter__()]

    def get_fmin(self):
        copy of pick_unfit_method
        Currently selects the method with lowest fitness

        returns fmin
        method_fitness = ('empty', 2)
        nodes_dict = dict(self.reference_graph.nodes(data=True))
        for method_node in self.reference_graph.__iter__():
            node_data = nodes_dict[method_node]['data']
            fitness = node_data['fitness']
            if fitness < method_fitness[1]:
                method_fitness = (node_data, fitness)
        return method_fitness[1]

    def run_model(self):
        Run function that completely runs the model
        for _ in range(self.iterations):
            self.step_n += 1

    def step(self):
        p_create_method = 0.2
        p_call_method = 1 - p_create_method
        p_delete_method = 0.1
        p_update_method = 1 - p_delete_method
        change_size = 0
        while change_size == 0:
            action = sample([
                self.create_method, self.call_method, self.update_method,
            ], [
                p_create_method, p_call_method, p_update_method,
            change_size = action()
        print('number of methods: %d' % self.reference_graph.number_of_nodes())

        return change_size

    def create_method(self):
        print('creating a method')
        klass = None
        if random() < self.p_create_class:
            klass = self.create_class()
            changes = 2
            classes = []
            sizes = []
            for node, data in self.inheritance_graph.nodes_iter(data=True):
            klass = sample(classes, [s + 1 for s in sizes])
            changes = 1
        method = self.code_modifier.create_method(klass)
        self.reference_graph.add_node(method.name, {
            'method': method,
            'class': klass,
            'fitness': random(),
            'lines': 0
        return changes

    def call_method(self):
        print('calling a method')
        methods = []
        sizes = []
        in_degrees = []
        for node, in_degree in self.reference_graph.in_degree_iter():
            data = self.reference_graph.node[node]

        caller_info = sample(methods, [s + 1 for s in sizes])
        callee_info = sample(methods, [d + 1 for d in in_degrees])
        # The will introduce some instability when the probability of creating and deleting methods drops to near 0
        caller_info['fitness'] = random()
        callee_info['lines'] += 1
        return 1

    def update_method(self):
        print('updating a method')
        method = self.choose_unfit_method()[0]
        method_info = self.reference_graph.node[method]
        method_info['fitness'] = random()
        method_info['lines'] += 1
        return 1

    def delete_method(self, method=None):
        Delete a method and delete the method call from its callers. It a caller becomes
        empty after deleting the method, delete the caller as well and the deletion propagates
        :param method: The method to be deleted. If None, randomly choose one
        :return: The number of changes made
        print('deleting a method')
        change_size = 0
        if self.reference_graph.number_of_nodes() == 1:
            # Don't delete the last method
            return 0
        if method is None:
            # method = choice(self.reference_graph.nodes())
            method = self.choose_unfit_method()[0]
        method_info = self.reference_graph.node[method]
        class_node = method_info['class']
        void_callers = []
        for caller in self.reference_graph.predecessors_iter(method):
            if caller != method:
                caller_info = self.reference_graph.node[caller]
                caller_node = caller_info['method']
                if len(caller_node.body) == 0:
                    caller_info['fitness'] = random()
                    caller_info['lines'] -= 1
                    change_size += 1
        self.code_modifier.delete_method(class_node, method_info['method'])
        change_size += 1
        if len(class_node.body) == 0:
        # recursively remove all void callers
        for caller in void_callers:
            change_size += self.delete_method(caller)
        return -change_size

    def create_class(self):
        superclass_name = None
        if random() > self.p_no_inherit:
            class_names = []
            num_subclasses = []
            for node, in_degree in self.inheritance_graph.in_degree_iter():

            superclass_name = sample(class_names,
                                     [n + 1 for n in num_subclasses])
        klass = self.code_modifier.create_class(superclass_name)
        self.inheritance_graph.add_node(klass.name, {'class': klass})
        if superclass_name:
            self.inheritance_graph.add_edge(klass.name, superclass_name)
        return klass

    def choose_unfit_method(self):
        :return: the method with least fitness number. Can change to a probabilistic function that biases towards
        less fit methods if the current implementation makes the system too stable
        min_fitness = 1
        unfit_method = None
        for method, data in self.reference_graph.nodes_iter(True):
            if data['fitness'] < min_fitness:
                min_fitness = data['fitness']
                unfit_method = method
        return unfit_method, min_fitness

    def get_fitnesses(self):
        fitnesses = [
            for method, data in self.reference_graph.nodes_iter(True)
        return fitnesses