# ```bash # LR(C=4.015231900472649, penalty='l2') # LR(C=9.556786605505499, penalty='l2') # LR(C=4.05716261883461, penalty='l1') # LR(C=3.2786487445120858, penalty='l1') # LR(C=4.655510386502897, penalty='l2') # ``` # Now we can search for the best combination of constructor parameters by # trying a bunch of different instances and see which one obtains the best score. # AutoGOAL also has tools for automating this process. from autogoal.search import RandomSearch search = RandomSearch(grammar, evaluate, random_state=0) # Fixed seed best, score = search.run(100) print("Best:", best, "\nScore:", score) # The `RandomSearch` will try 100 different random instances, and for each one # run the `evaluate` method we defined earlier. It returns the best one and the corresponding score. # ``` # Best: LR(C=0.7043201482743121, penalty='l1') # Score: 0.8853333333333337 # ``` # So we can do a little bit better by carefully selecting the right parameters. # However, maybe we can do even better.
fitness_fn = movie_reviews.make_fn(examples=100) # ### Random search # # The `RandomSearch` strategy simply calls `grammar.sample()` a bunch of times # and stores the best performing pipeline. It has no intelligence whatsoever, # but it serves as a good baseline implementation. # # We will run it for a total of `1000` fitness evaluations, or equivalently, a total # of `1000` different random pipelines. To see what's actually going on we will use # the wonderfull `enlighten` library through our implementation `EnlightenLogger`. logger = ProgressLogger(log_solutions=True) random_search = RandomSearch(grammar, fitness_fn, random_state=0) best_rand, fn_rand = random_search.run(1000, logger=logger) # !!! note # For reproducibility purposes we can pass a fixed random seed in `random_state`. # # ### Evolutionary Search # # Random search is fun, but to search with purpose, we need a more intelligent sampling # strategy. The `PESearch` (short for Probabilistic Evolutionary Search, phew), does just that. # It starts with a random sampling strategy, but as it evaluates more pipelines, it modifies # an probabilistic sampling model so that pipelines similar to the best ones found are more # commonly sampled. # # There are three main parameters for `PESearch`. #