Exemplo n.º 1
0
def main():
    # Run 'breeding'
    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)
    orig_stdout = sys.stdout
    f = open('ga_history.txt', 'w')
    sys.stdout = f
    print('stats')
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", numpy.mean)
    stats.register("std", numpy.std)
    stats.register("min", numpy.min)
    stats.register("max", numpy.max)
    algorithms.eaSimple(pop, toolbox, 0.5, 0.2, 40, stats, halloffame=hof)
    expr = gp.genFull(pset, min_=1, max_=3)
    tree = gp.PrimitiveTree(expr)
    print('Tree')
    print(str(tree))
    # print 'Eligible moves: ', self.eligible_moves()
    #print('Pits: ', self.pits)
    #print('Board: ', self.board.textify_board())
    # print(pop, hof, stats)
    f.close()
    sys.stdout = orig_stdout
    # function = gp.compile(hof, pset)
    # print(function)
    print("done")
    # print(pop, hof, stats)
    return pop, hof, stats
Exemplo n.º 2
0
def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, logbook = algorithms.eaSimple(pop,
                                       toolbox,
                                       0.5,
                                       0.1,
                                       50,
                                       stats=mstats,
                                       halloffame=hof,
                                       verbose=True)
    expr = hof[0]
    tree = gp.PrimitiveTree(expr)
    print(tree)
    draw_logbook(logbook)
    return pop, logbook, hof
Exemplo n.º 3
0
    def test_example(self, ex, optimal=False, cats=1):
        print("\nExample:")
        print(ex)

        target_test = regex.compile(self.regex_target).findall(ex['string'])
        print("Target:")
        print(target_test)

        if optimal:
            expr = self.gen_best_individual(ex)
        else:
            expr = self.generate_individual_from_example(ex,
                                                         self.PATTERN1,
                                                         collapse=False,
                                                         cats=cats)

        if len(expr) > 0:
            tree = gp.PrimitiveTree(expr)
            ts = gp.compile(tree, self.pset).s
            indr = regex.compile(ts)
            print(indr)
            test = indr.findall(ex['string'])
            print(test)
            print(f'Expression Length: {len(ts)}')
        else:
            print("No match found.")
Exemplo n.º 4
0
def generate_initial_population_impl(toolbox):
    toolbox.ind_str_set = set()
    population = []
    retry_count = 0
    while len(population) < toolbox.pop_size[0]:
        ind = gp.PrimitiveTree(
            gp.genHalfAndHalf(pset=toolbox.pset, min_=2, max_=4))
        ind.age = 0
        ind.id = toolbox.get_unique_id()
        pp_str = make_pp_str(ind)
        if pp_str in toolbox.ind_str_set or len(
                ind) > toolbox.max_individual_size:
            if retry_count < toolbox.child_creation_retries:
                retry_count += 1
                continue
            else:
                break
        retry_count = 0
        toolbox.ind_str_set.add(pp_str)
        evaluate_individual(toolbox, ind, pp_str, 0)
        if ind:
            toolbox.f.write(
                f"at gen {toolbox.real_gen}, [{ind.id}] = {get_ind_info(ind)} = init\n"
            )
            toolbox.f.write(
                f"at gen {toolbox.real_gen}, [{ind.id}] = {str(ind)}\n")
        population.append(ind)
    return population
Exemplo n.º 5
0
 def save_results(self, fname):
     '''
     Save the results of running the DEAP GP to a text file.
     Params:
         fname - string of the filename.
     Returns:
         N/A
     '''
     if self.rlist:
         print("Writing results file: {}".format(fname))
         with open(fname, "w") as fout:
             fout.writelines(str(self.log))
             best = "\n" + "-" * 80 + "\n"
             best += "\nBest individual:"
             seqstr = ", ".join(map(str, self.slist))
             best += "\nRequired sequence: {}".format(seqstr)
             seqstr = ", ".join(map(str, self.rlist))
             best += "\nActual sequence:   {}".format(seqstr)
             best += "\n\n"
             # Display the resultant equation from the best individual
             tree = gp.PrimitiveTree(self.expr)
             best += "\nBest algorithm: {}".format(str(tree))
             # Report whether the individual is a success or not.
             if self.slist == self.rlist:
                 best += "\n\nSuccess!"
             else:
                 best += "\n\nFailed!"
             fout.writelines(best)
     else:
         print("No results available to write to file.")
Exemplo n.º 6
0
 def create_reduced_dataset(individual, pset, X):
     exp = gp.PrimitiveTree(individual)
     string = str(exp)
     ind = [i for i in range(len(string)) if string.startswith('F', i)]
     if len(ind) == 0:
         ind = [0]
     features = []
     hist = []
     temp = []
     for i in ind:
         subtree = fitness.get_subtree(i, string)
         if str(subtree) not in hist:
             hist.append(str(subtree))
             newtree = exp.from_string(subtree, pset)
             temp.append(str(newtree))
             features.append(gp.compile(newtree, pset))
     if len(features) == 0:
         features.append(gp.compile(individual, pset))
     X_new = []
     i = 0
     for x in X:
         X_new.append([])
         for feature, t in zip(features, temp):
             X_new[i].append(feature(*x))
         i += 1
     return X_new
Exemplo n.º 7
0
def evalInvalid(atlas, toolbox):
    invalid_ind = [ind for ind in atlas if ind.charac is None]

    #Reduce individuals to primitiveTrees to reduce parallelisation overhead
    invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind]
    calculations = list(toolbox.multiMap(toolbox.evalMapping,
                                         invalid_primTree))
    return assignValues(invalid_ind, calculations)
Exemplo n.º 8
0
def pruebaIndividuo():

    pset = configuraIndividuo()
    # Realiza una incialización completa con profundida entre 1 y 3
    expr = gp.genFull(pset, min_=1, max_=3)
    # Obtiene el arbol correspondiente
    tree = gp.PrimitiveTree(expr)
    # Se visualiza como una lista de operaciones
    print(tree)
def evaluate_final_pop(arg):

    expr = arg
    #expr = ''.join(str(i) for i in l)
    tree = gp.PrimitiveTree(expr)
    tree = str(tree)
    #print("t:  ",tree)
    song = gp.compile(tree, pset)
    #print(function)
    return song
Exemplo n.º 10
0
def evalSymbReg(individual):
    # Transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    markup = func(x='text')
    code = str(gp.PrimitiveTree(individual))
    # result_code = 'body(concat(header(concat(btn(x), btn(x))), concat(btn(x), btn(x))))'
    # result_code = 'body(header(concat(btn(x), btn(x))))'
    # code = str(gp.PrimitiveTree(individual))
    # return -len(set(result_code) & set(code)) - (10 if result_code == code else 0),
    return -calc_similarity(RESULT_HTML, markup),
def subtreeGenerator(subtreeSlices, treeBestInd):
    listOfPrimSubTF = []
    for j in range(len(subtreeSlices)):
        primSubTreeL = []
        for i in range(subtreeSlices[j].start, subtreeSlices[j].stop):
            primSubTreeL.append(treeBestInd[i])
        primSubTree = gp.PrimitiveTree(primSubTreeL)
        toolbox = base.Toolbox()
        toolbox.register("compile", gp.compile, pset=pset)
        listOfPrimSubTF.append(toolbox.compile(expr=primSubTree))
    return listOfPrimSubTF
Exemplo n.º 12
0
def txt_to_individual(file_path, pset):
    file = open(file_path, 'r')
    string = file.read()
    file.close()

    string.replace('div', 'pdiv')
    string.replace('sqrt', 'psqrt')
    string.replace('log', 'plog')
    expr = gp.genFull(pset, min_=1, max_=3)
    tree = gp.PrimitiveTree(expr)
    individual = tree.from_string(string, pset)
    return individual
Exemplo n.º 13
0
def pruebaIndividuo():
    
    pset = configuraIndividuo()
    # Realiza una incialización completa con profundida entre 1 y 3
    # Performs a full initialisation with depth between 1 and 3
    expr = gp.genFull(pset, min_=1, max_=3) 
    # Obtiene el arbol correspondiente
    # Extracts the corresponding tree
    tree = gp.PrimitiveTree(expr)
    # Se visualiza como una lista de operaciones
    # Displayed as a list of operations
    print(tree)
Exemplo n.º 14
0
def pruebaPoblacion():

    toolbox = base.Toolbox()
    
    configuraPoblacion(toolbox)
    
    # Se inicializa la poblacion. Tendrá un total de 10 individuos.
    pop = toolbox.population(n=10)

    # Se imprime la población: 10 individuos como arboles de expresiones
    for ind in pop:
        print(gp.PrimitiveTree(ind))
Exemplo n.º 15
0
    def get_optimal_drops(self, example, pattern, debug=False):
        non_opt_expr = self.generate_individual_from_example(example,
                                                             pattern,
                                                             collapse=False)
        tree_non_opt = gp.PrimitiveTree(non_opt_expr)
        ts = gp.compile(tree_non_opt, self.pset).s
        best_len, best_expr = len(ts), non_opt_expr
        target_test = regex.compile(self.regex_target).findall(
            example['string'])
        upper = 15
        while upper > 2:
            skip = 2
            while skip < 12:
                temp_expr = self.generate_individual_from_example(
                    example,
                    pattern,
                    collapse=True,
                    drop_limit=upper,
                    skip_count=skip)
                tree_test = gp.PrimitiveTree(temp_expr)
                ts = gp.compile(tree_test, self.pset).s
                drop_len = len(ts)
                indr = regex.compile(ts)
                test = indr.findall(example['string'])

                if test == target_test:
                    skip += 2
                    upper -= 1
                    if drop_len < best_len:
                        if debug:
                            print(
                                f'Regex: {indr}, length: {drop_len}, test: {test}'
                            )
                        best_expr = temp_expr
                        best_len = drop_len
                else:
                    skip = 12  # no use trying to skip more is skipping this many doesn't work
                    upper = 2  # no use trying a smaller limit if a larger doesn't work

        return best_expr
Exemplo n.º 16
0
    def show_results(self, gtitle):
        '''
        Display the results of executing the DEAP GP object; best individual
        is displayed.
        Params:
            gtitle - string of the graph title.
        Returns:
            N/A
        '''
        if self.rlist:
            # Show the squence required
            result = ", ".join(map(str, self.slist))
            print("\nRequired sequence: {}".format(result))
            # Show the resultant integer sequence
            result = ", ".join(map(str, self.rlist))
            print("\nCalculated result: {}".format(result))
            # Let the user know how it went.
            print("-" * 80)
            if self.rlist == self.slist:
                print("\nSuccessfully calculated the Integer Sequence.")
            else:
                print("\nUnsuccessfull in calculating the Integer Sequence.")
            # Display the individual
            print('\nBest individual : ', self.expr)
            # Display the resultant equation from the best individual
            tree = gp.PrimitiveTree(self.expr)
            str(tree)

            # Display the best individual => graph and equation.
            # Only works reliably on Linux Ubuntu.
            if sys.platform == 'linux' or sys.platform == 'linux2':
                print("Running on Linux OS.")
                nodes, edges, labels = gp.graph(self.expr)
                # Create tree diagram
                g = nx.Graph()
                g.add_nodes_from(nodes)
                g.add_edges_from(edges)
                pos = graphviz_layout(g, prog="dot")

                nx.draw_networkx_nodes(g, pos)
                nx.draw_networkx_edges(g, pos)
                nx.draw_networkx_labels(g, pos, labels)

                # Remove the filename extension
                gtitle = os.path.splitext(gtitle)[0]
                plt.title(gtitle, None, 'center', None)
                plt.savefig(gtitle + '.png')
                #                plt.show()
            else:
                print("Graphical output only available on Linux.")
        else:
            print("\nError: hof variable is emtpy.")
Exemplo n.º 17
0
    def view_results(self, results):

        lab_dict = {
            'cat_prim': '+',
            'q_constr': '?',
            'plus_constr': '+',
            'or_prim': '|',
            'star_constr': 'x',
            'looka_constr': '(?=)',
            'lookb_constr': '(?<=)',
            'nch_constr': '[^ ]',
            'ch_constr': '[ ]',
            'term_constr': 't()'
        }
        hof, log = results
        avgs = [l['avg'] for l in log]
        plt.plot(avgs)
        plt.xlabel('Generations')
        plt.ylabel('Fitness Averages')
        plt.title('Capture Fitness Avgs over Generations (0 is perfect)')
        plt.show()

        expr = hof.__dict__['items'][0]
        tree = gp.PrimitiveTree(expr)
        str(tree) + '   =   ' + gp.compile(tree, pset=self.pset).s

        # for i in range(3):
        nodes, edges, labels = gp.graph(expr)
        print(labels.values())
        new_labs = {}
        for i, lab in enumerate(labels.values()):
            if self.has_digit(lab):
                new_labs[i] = self.pset.context[lab].s
                if new_labs[i] == ' ':
                    new_labs[i] = '\' \''
            else:
                new_labs[i] = lab_dict[lab]

        print(labels)
        print(new_labs)
        graph = networkx.Graph()
        graph.add_nodes_from(nodes)
        graph.add_edges_from(edges)
        pos = graphviz_layout(graph, prog="dot")

        plt.figure(figsize=(10, 10))
        networkx.draw_networkx_nodes(graph, pos, node_size=400, node_color='w')
        networkx.draw_networkx_edges(graph, pos, edge_color="blue")
        networkx.draw_networkx_labels(graph, pos, new_labs)
        plt.axis("off")
        plt.show()
Exemplo n.º 18
0
    def visit_FuncDef(self, node):
        name = node.decl.name

        params = list([(decl.name, decl.type.type.names[0])
                       for decl in node.decl.type.args.params])

        for param_name, type in params:
            if type in type_map:
                real_type, _ = type_map[type]
            else:
                real_type = Val
            self.vars[param_name] = real_type

        self.tree = gp.PrimitiveTree([])
        self.visit(node.body)
        self.results.append((name, params, self.tree))
        self.tree = None
Exemplo n.º 19
0
def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("min", np.min)
    mstats.register("avg", np.mean)

    pop, log = algorithms.eaSimple(pop,
                                   toolbox,
                                   0.5,
                                   0.2,
                                   20,
                                   stats=mstats,
                                   halloffame=hof,
                                   verbose=True)
    expr = hof[0]
    tree = gp.PrimitiveTree(expr)

    print('LOSS: {}'.format(evalByColorProportion(expr)))
    print()

    print('CODE: {}'.format(tree))
    print()

    func = toolbox.compile(expr=expr)
    markup = func(x='text')
    print('HTML: \n')
    print(markup)
    print()

    draw_graph(expr)
    draw_logbook(log)

    func = toolbox.compile(expr=expr)
    markup = body(func(x='text'))
    result_img = renderer.render_html(markup)
    result_img.save('output/result_html.png')

    return pop, log, hof
Exemplo n.º 20
0
    def gen_best_individual(self,
                            ex,
                            w=True,
                            collapse=False,
                            cats=1,
                            drop_limit=10,
                            skip_counts=2):

        # try first match pattern which produces shorter results (combines punctuation)
        expr = self.generate_individual_from_example(ex, self.PATTERN1)
        target = regex.compile(self.regex_target).findall(ex['string'])
        expr_regex = regex.compile(
            gp.compile(gp.PrimitiveTree(expr), self.pset).s)
        if expr_regex.findall(ex['string']) == target:
            best_pattern = self.PATTERN1
        else:
            best_pattern = self.PATTERN2

        return self.get_optimal_drops(ex, best_pattern)
Exemplo n.º 21
0
    def from_string2(cls, string, pset):
        tokens = re.split("[ \t\n\r\f\v(),]", string)
        expr = []
        for token in tokens:
            if token == '':
                continue
            if token in pset.mapping:
                primitive = pset.mapping[token]
                expr.append(primitive)
            else:
                try:
                    token = eval(token)
                except NameError:
                    raise TypeError(
                        "Unable to evaluate terminal: {}.".format(token))

                type_ = type(token)
                expr.append(Terminal(token, False, type_))
        return gp.PrimitiveTree(expr)
Exemplo n.º 22
0
def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("max", numpy.mean)

    pop, log = algorithms.eaSimple(pop,
                                   toolbox,
                                   0.5,
                                   0.1,
                                   20,
                                   stats=mstats,
                                   halloffame=hof,
                                   verbose=True)
    expr = hof[0]
    tree = gp.PrimitiveTree(expr)

    print('SCORE: \n')
    print(evalSymbReg(expr))
    print()

    print('CODE: \n')
    print(tree)
    print()

    func = toolbox.compile(expr=expr)
    markup = func(x='text')
    # print('HTML: \n')
    # print(markup)
    # print()

    # print('RESULT HTML: \n')
    # print(RESULT_HTML)
    return pop, log, hof
Exemplo n.º 23
0
 def __init__(self,
              model,
              dataset_meta_features_df,
              primitive_to_hash_dic,
              le,
              knowledge_base_path,
              json_path,
              longest_pipeline_size,
              rank_size=100,
              use_meta_model_flag=True,
              boosting=False,
              boosting_percent=0.1,
              meta_selection_window_size=10,
              meta_selection=False,
              meta_selection_type='offspring',
              meta_fitness_flag=False,
              **kwargs):
     self.meta_fitness_flag = meta_fitness_flag
     self.use_boosting_flag = boosting
     super().__init__(**kwargs)
     self.knowledge_base_path = knowledge_base_path
     self.json_path = json_path
     if self.use_boosting_flag and (self.knowledge_base_path is None
                                    or self.json_path is None):
         raise ValueError(
             "Cannot use boosting without knowledge-base or Json path")
     self.meta_features_df = dataset_meta_features_df
     self.longest_pipeline_size = longest_pipeline_size
     self.use_metamodel_flag = use_meta_model_flag
     self.meta_selection_type = meta_selection_type
     self.boosting_size = int(self.population_size * boosting_percent)
     self.meta_selection_size = meta_selection_window_size
     self.meta_selection_flag = meta_selection
     self.rank_size = rank_size
     self.meta_model = model
     self.primitives_to_hash_dic = primitive_to_hash_dic
     self.gptree = gp.PrimitiveTree(list())
     self.le = le
     self.gen_results_dic = {}
def prueba():
    '''Herramienta para guardar la configuracion de la poblacion'''
    toolbox = base.Toolbox()

    ConfiguracionProblema.configuraPoblacion(toolbox)

    # Se instancia un individuo (aleatorio)
    ind = toolbox.individual()
    '''
    Se aconseja al alumno probar con varios individuos en diferentes condiciones
    de optimalidad para comprobar si la función está bien definida en todo el 
    espacio de búsqueda.    
    '''
    # Se imprime el individuo ANTES de evaluar
    tree = gp.PrimitiveTree(ind)
    print(tree)
    print(ind.fitness.valid)  # False

    ind.fitness.values = evalEcuacion(toolbox, ind)

    # Se imprime el individuo DESPUES de evaluar
    print(ind.fitness.valid)  # True
    print(ind.fitness)
def evaluate_fitness(arg):
    #print(arg)

    fitness = 0
    tree = gp.PrimitiveTree(arg)
    tree = str(tree)
    #print("t:  ",tree)
    song = list(gp.compile(tree, pset))

    ###########################################
    str_song = "".join(song)
    # convert sequence into latent embeddings form
    X_new_counts = count_vect.transform([str_song])
    X_new_tfidf = tfidf_transformer.transform(X_new_counts)

    # discriminator predict probability of given sample being generated by DNN or GP
    dnn_prob, gp_prob = discriminator.predict_proba(X_new_tfidf)[0]
    # higher penalty if the sample is considered more like GP outpout
    fitness -= 200 * gp_prob
    ###########################################

    if (len(song) <= 3):
        fitness = fitness - 10
    elif (len(song) > 3 and len(song) <= 100):
        fitness += 9
    else:
        fitness -= 70
    c = Counter(song)
    note_count = c.values()
    notes = c.keys()
    for value in note_count:
        if value <= len(song) / 3:
            fitness += 10
        if value > len(song) / 2:
            fitness = fitness - 20

    return fitness,
Exemplo n.º 26
0
def mutUniform(toolbox, parent, expr, pset):
    child = copy_individual(toolbox, parent)
    index = random.randrange(0, len(child))
    slice_ = child.searchSubtree(index)
    type_ = child[index].ret
    mutation = expr(pset=pset, type_=type_)
    child[slice_] = mutation
    pp_str = make_pp_str(child)
    if pp_str in toolbox.ind_str_set or len(
            child) > toolbox.max_individual_size:
        return None, None
    evaluate_individual(toolbox, child, pp_str, 0)
    if child:
        mutation = gp.PrimitiveTree(mutation)
        expr_str = str(mutation)
        f1 = parent.fam.family_index
        toolbox.f.write(
            f"at gen {toolbox.real_gen}, [{child.id}] = {get_ind_info(child)} = mut [{parent.id}]<{f1}>\n"
        )
        toolbox.f.write(
            f"at gen {toolbox.real_gen}, [{child.id}] = mut expr {expr_str}\n")
        toolbox.f.write(
            f"at gen {toolbox.real_gen}, [{child.id}] = {str(child)}\n")
    return child, pp_str
Exemplo n.º 27
0
def create_attribute(pset):
    tree = gp.genGrow(pset=pset,
                      min_=settings.MINIMAL_TREE_DEPTH,
                      max_=settings.MAXIMUM_TREE_DEPTH)

    return gp.PrimitiveTree(tree)
Exemplo n.º 28
0
def eaParetosSVD(atlases,
                 toolbox,
                 cxpb,
                 mutpb,
                 ngen,
                 minDiv,
                 minSize=20,
                 stats=None,
                 halloffame=None,
                 checkpoint=None,
                 freq=50,
                 verbose=__debug__):
    """
    Structure of this algorithm:
    
    Calculates fitness of mappings in atlas
    """
    invalid_ind = [ind for ind in sum(atlases, []) if ind.charac is None]
    invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind]
    calculations = toolbox.multiMap(toolbox.evalMapping, invalid_primTree)

    widgets = [
        'Evolving: ',
        Percentage(), ' ',
        Bar(marker='#', left='[', right=']'), ' ',
        ETA()
    ]

    nevals = 0
    paretoN = 0
    adaptiveMinDiv = False
    lastcheckpoint = None

    logbook = tools.Logbook()
    logbook.header = (stats.fields
                      if stats else []) + ['gen', 'nevals', 'natlas']

    #Generate population of migrants to allow transfer between atlases
    migrants = []

    pbar = ProgressBar(widgets=widgets, maxval=ngen + 1)
    pbar.start()
    # Begin the generational process
    for gen in range(1, ngen + 1):

        #Evaluate fitness and pareto fronts for the generation
        #nevals += sum(toolbox.map(toolbox.evalCRS, atlases))
        print 'calc'
        nevals += assignValues(invalid_ind, list(calculations))
        print 'calc end'

        #filter invalid mappings out so that SVD can be done:
        def filterInd(individual):
            validCharac = np.isfinite(
                individual.charac['characteristic'].sum())
            validNorm = not np.isclose(
                individual.domainSD + np.abs(individual.domainAv), 0)
            validRobust = individual.robustness < 1e10
            return validCharac and validNorm and validRobust

        atlases = toolbox.map(lambda x: filter(filterInd, x), atlases)

        if type(minDiv) is tuple:
            a, b = minDiv
            adaptiveMinDiv = True
        else:
            minDivList = [minDiv for atlas in atlases]

        if adaptiveMinDiv:
            minDiv = 1 - 10**(-paretoN**a / b)
            minDivList = [1 - 10**(-paretoN**a / b) for atlas in atlases]
        print 'filter'
        selectedAtlases = list(
            toolbox.map(lambda atlas, minD: selectSVD(atlas, minD), atlases,
                        minDivList))
        print 'selected'
        # Match size of migrants to pareto fronts
        paretoN = sum(map(len, selectedAtlases))
        while paretoN > len(migrants) or len(atlases) * minSize > len(
                migrants):
            ind = toolbox.individual()
            ind.charac = None
            ind.robustness = None
            ind.shift = None
            migrants.append(ind)
    #Place selected individuals in migrant population
        selected = sum(selectedAtlases, [])
        swap(selected, migrants)
        # Vary the pool of individuals
        migrants = varAnd(migrants, toolbox, cxpb, mutpb)
        print 'migrants'

        #Invalidate mutated individuals
        def newInd(ind):
            ind.charac = None
            ind.robustness = None
            ind.shift = None
            del ind.fitness.values
            return ind

        [newInd(ind) for ind in migrants if not ind.fitness.valid]

        #create new atlases and mutate migrants:
        def nextGen(selected):
            if len(selected) > minSize:
                offspring = random.sample(migrants, len(selected))
            else:
                offspring = random.sample(migrants, 2 * minSize)

            # Update the hall of fame with the generated individuals
            if halloffame is not None:
                halloffame.update(selected)

            return removeDuplicates(selected + offspring)

        atlases = toolbox.map(nextGen, selectedAtlases)

        invalid_ind = [ind for ind in sum(atlases, []) if ind.charac is None]
        invalid_primTree = [gp.PrimitiveTree(ind) for ind in invalid_ind]
        calculations = toolbox.multiMap(toolbox.evalMapping, invalid_primTree)

        record = stats.compile(sum(selectedAtlases, [])) if stats else {}
        logbook.record(gen=gen,
                       nevals=nevals,
                       natlas=map(len, selectedAtlases),
                       **record)

        if verbose:
            print logbook.stream
        print 'new Atlases', map(len, atlases)

        if checkpoint is not None and gen % freq == 0:
            os.chdir(checkpoint)
            filename = str(gen) + '-' + datetime.now().time().replace(
                microsecond=0).isoformat() + '.pkl'
            cp = dict(
                atlases=selectedAtlases,
                generation=gen,
                halloffame=halloffame,
                logbook=logbook,
                migrants=migrants,
                rndstate=random.getstate(),
            )
            with open(filename, 'wb') as f:
                pickle.dump(cp, f, -1)
            print "Checkpointed at generation %.i" % gen + ' in ' + checkpoint + '/' + filename
            lastcheckpoint = gen
        elif lastcheckpoint is not None:
            print "Last checkpointed at generation %.i" % lastcheckpoint + ' in ' + checkpoint + '/' + filename
        print ''
        pbar.update(gen)
        print ''
        print ''
    pbar.finish()

    return paretos, logbook
Exemplo n.º 29
0
    def run(self, verbose=False):

        creator.create("FitnessMulti",
                       base.Fitness,
                       weights=(-1.0, -1.0, -1.0))
        creator.create("Individual",
                       gp.PrimitiveTree,
                       fitness=creator.FitnessMulti)
        start = time.time()
        self.population = self.init_pop()
        elapsed = time.time() - start
        #print(f'elapsed seconds for population initialization: {elapsed}')

        self.population = [
            creator.Individual(indv) for indv in self.population
        ]
        toolbox = base.Toolbox()

        toolbox.register("expr",
                         self.genRampedRegexTree,
                         min_=self.min_ht,
                         max_=self.max_ht,
                         ratio=self.term_ratio,
                         classes=self.classes,
                         pset=self.pset)
        toolbox.register("individual", tools.initIterate, creator.Individual,
                         toolbox.expr)
        toolbox.register("population", tools.initRepeat, list,
                         toolbox.individual)
        toolbox.register("mutate",
                         gp.mutUniform,
                         expr=toolbox.expr,
                         pset=self.pset)

        hof = tools.HallOfFame(1)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("min", np.min)
        stats.register("max", np.max)

        logbook = tools.Logbook()
        logbook.header = ['gen'] + stats.fields

        for g in range(self.ngen):

            start_time = time.time()
            # get the fitnesses for every individual in the population
            fitnesses = futures.map(self.evaluate_regex, self.population)
            for indv, fits in zip(self.population, fitnesses):
                indv.fitness.values = fits

            # log and record progress
            record = stats.compile(self.population)
            logbook.record(gen=g, **record)
            if verbose:
                print(logbook.stream)
            hof.update(self.population)

            # sort by Pareto-fronts (NSGA-II, Deb, et)
            self.population = [
                indv for front in tools.sortNondominated(
                    self.population, self.pop_size) for indv in front
            ]

            keep_num = int(self.pop_size * 0.9)  # keep 90% of old gen
            new_pop = []
            while len(new_pop) < keep_num:
                rnum = random.random()
                if rnum < self.CXPB:
                    cx_indv1 = tools.selTournament(self.population,
                                                   k=1,
                                                   tournsize=7)[0]
                    cx_indv2 = tools.selTournament(self.population,
                                                   k=1,
                                                   tournsize=7)[0]

                    # cx_indv1, cx_indv2 = gp.cxOnePointLeafBiased(cx_indv1, cx_indv2, self.term_ratio)
                    cx_indv1, cx_indv2 = self.cxLeafOrSubTree(
                        cx_indv1, cx_indv2, self.term_ratio)
                    new_pop.append(cx_indv1)
                    new_pop.append(cx_indv2)
                elif rnum < self.CXPB + self.MUTPB:
                    mutant = toolbox.mutate(
                        tools.selTournament(self.population, k=1,
                                            tournsize=7)[0])[0]
                    new_pop.append(mutant)
                else:
                    new_pop.append(
                        tools.selTournament(self.population, k=1,
                                            tournsize=7)[0])

            self.population = new_pop + toolbox.population(n=self.pop_size -
                                                           keep_num)

            best = tools.selBest(self.population, k=1)[0]
            tree = gp.PrimitiveTree(best)
            print('Best of that gen:')
            print(
                gp.compile(tree, pset=self.pset).s + '\nFitness: ' +
                str({best.fitness.values}))
            elapsed_time = time.time() - start_time
            remaining_min = (elapsed_time * (self.ngen - g)) / 60
            remaining_hours = remaining_min / 60
            print(
                f"Time for last gen: {elapsed_time} secs, Remaining: {remaining_min} minutes, {remaining_hours} hours."
            )
            print('[' + ('*' * (g // self.ngen)) +
                  ((100 - (g // self.ngen)) * ' ') + ']')

        return hof, logbook
Exemplo n.º 30
0
    def demo_test(self):
        # TESTING ...

        # Generate 10 random trees
        for i in range(10):
            pset = self.pset
            terminals = self.terminals
            expr = self.genRampedRegexTree(min_=7,
                                           max_=20,
                                           ratio=0.7,
                                           pset=self.pset,
                                           classes=self.classes)
            tree = gp.PrimitiveTree(expr)
            indr = gp.compile(tree, self.pset).s
            print(f"Tree: {tree}")
            print(f"Random Regex: {repr(indr)}")
            indr = regex.compile(indr)

        # an example of building a tree manually for a regular expression representation as a tree,
        # where the nodes alternate from bottom up- right branch, then left closer to primitive
        expr = [
            self.get_primitive(class_precedence(LookAheadClass,
                                                LookBehindClass),
                               [LookAheadClass, LookBehindClass],
                               key='cat'),
            self.get_primitive(LookBehindClass, [TermClass]),
            self.get_primitive(TermClass, [TermClass, TermClass], key='cat'),
            self.get_term(TermClass, "\w")[0],
            self.get_term(TermClass, "title")[0],
            self.get_primitive(class_precedence(LookAheadClass, TermClass),
                               [LookAheadClass, TermClass],
                               key='cat'),
            self.get_term(TermClass, "target ={stuff}")[0],
            self.get_primitive(class_precedence(LookAheadClass, TermClass),
                               [LookAheadClass, TermClass],
                               key='cat'),
            self.get_term(TermClass, "\w")[0],
            self.get_primitive(LookAheadClass, [TermClass]),
            self.get_primitive(TermClass, [TermClass, TermClass], key='cat'),
            self.get_term(TermClass, "\s")[0],
            self.get_term(TermClass, "year")[0]
        ]

        tree = gp.PrimitiveTree(expr)
        indr = regex.compile(gp.compile(tree, pset).s)
        print("\nManually Constructed INDR:")
        print(indr)
        eval_test = self.evaluate_regex(tree, fit_cache=None)

        ex_mod = {
            "string": "\ntitle a={La puerta}, 1 2 a b c\n",
            "match": [{
                "start": 10,
                "end": 19
            }],
            "unmatch": [{
                "start": 0,
                "end": 10
            }, {
                "start": 19,
                "end": 32
            }]
        }

        rexp = "(?<=\ntitle \w+={)\w+ \w+(?=}, \d+ \d+(?:\s)|(?:\w)\n)"
        #rexp = "(?<=\ntitle \w={)\w+ \w+(?=}, \d+ \d+( \w+)+\n)"
        rexp2 = "(?:(?<=\ntitle \w={))\w+ \w+(?=}, \d+ \d+( \w+)+\n)"

        indr = regex.compile(rexp)
        print(indr)
        test = indr.findall(ex_mod['string'])
        print(test)

        for e in [
                8, 35
        ]:  # examples with punctuation at end of target requiring different match pattern
            self.test_example(self.data['examples'][e], optimal=False)
            self.test_example(self.data['examples'][e], optimal=True)

        drop_misses, non_drop_misses, length_diffs = 0, 0, 0
        for i, ex in enumerate(self.data['examples']):
            print(f"EXAMPLE {i}:")
            #print(ex['string'][ex['match'][0]['start']:ex['match'][0]['end']])
            expr1 = self.generate_individual_from_example(ex, self.PATTERN1)
            expr2 = self.gen_best_individual(ex)

            if i == random.choice(self.data['examples']):
                print(f"Random Eval, Ex {i}:")
                self.evaluate_regex(expr1)
                self.evaluate_regex(expr2)

            target = regex.compile(self.regex_target).findall(ex['string'])
            ex1_rstring = gp.compile(gp.PrimitiveTree(expr1), self.pset).s
            ex2_rstring = gp.compile(gp.PrimitiveTree(expr2), self.pset).s
            expr1_regex = regex.compile(ex1_rstring)
            expr2_regex = regex.compile(ex2_rstring)
            test1 = expr1_regex.findall(ex['string'])
            test2 = expr2_regex.findall(ex['string'])
            print(test1)
            print(test2)
            len1 = len(ex1_rstring)
            len2 = len(ex2_rstring)
            if test1 != target:
                non_drop_misses += 1
            if test2 != target:
                drop_misses += 1

            if (test1 == target) and (test2 == target):
                print(len1, len2)
                length_diffs += len1 - len2

        print(
            f'Non-Drop Misses: {non_drop_misses}, Drop Misses: {drop_misses}')
        print(f'Length Differences: {length_diffs}')