def test_sample_grammar(): class A: def __repr__(self): return "A()" g = generate_cfg(A) assert str(g.sample()) == str(g())
def test_sample_generated_class(clss): from autogoal.grammar import generate_cfg, Sampler grammar = generate_cfg(clss, registry=classes) sampler = Sampler(random_state=0) for _ in range(1000): grammar.sample(sampler=sampler)
def test_search_is_replayable_from_grammar(): grammar = generate_cfg(A) search = RandomSearch(generator_fn=grammar, fitness_fn=fn) best, _ = search.run(1) sampler = best.sampler_.replay() best_clone = grammar(sampler) assert best == best_clone
def __init__(self, algorithm, input_types, output_types, registry=None) -> None: self.algorithm = algorithm self.input_types = set(input_types) self.output_types = set(output_types) self.grammar = generate_cfg(self.algorithm, registry=registry)
def test_sample_subset(): class A: def __init__(self, features: Subset("Subset", DiscreteValue(1, 5), "Hello", 1, None)): self.features = features g = generate_cfg(A) selected_features = g.sample().features selected = set([repr(feature) for feature in selected_features]) assert selected.issubset( [repr(feature) for feature in [DiscreteValue(1, 5), "Hello", 1, None]])
def test_generate_from_method_with_args(): def f(x: DiscreteValue(1, 5)): pass check_grammar( generate_cfg(f), """ <f> := f (x=<f_x>) <f_x> := discrete (min=1, max=5) """, )
def test_subset_annotation_with_constants(): class A: def __init__(self, features: Subset("Subset", "Hello", "World", 1)): pass check_grammar( generate_cfg(A), """ <A> := A (features=<Subset>) <Subset> := { 'Hello' , 'World' , 1 } """, )
def test_generate_from_class_with_args(): class A: def __init__(self, x: DiscreteValue(1, 5)): pass check_grammar( generate_cfg(A), """ <A> := A (x=<A_x>) <A_x> := discrete (min=1, max=5) """, )
def test_subset_annotation(): class A: def __init__(self, features: Subset("Subset", DiscreteValue(1, 5), "Hello", 1, None)): pass check_grammar( generate_cfg(A), """ <A> := A (features=<Subset>) <Subset> := { Discrete(min=1, max=5) , 'Hello' , 1 , None } """, )
def test_subset_annotation_with_callables(): class A: def __init__(self, features: Subset('Subset', Discrete(1, 5), Categorical('adam', 'sgd'))): pass check_grammar( generate_cfg(A), """ <A> := A (features=<Subset>) <Subset> := { Discrete(min=1, max=5) , Categorical('adam', 'sgd') } """, )
def test_generate_from_registry_with_dependance(): check_grammar( generate_cfg( HigherStemAlgorithm, registry=[ StemAlgorithm, HigherStemAlgorithm, TextAlgorithm, StemWithDependanceAlgorithm, ], ), """ <HigherStemAlgorithm> := HigherStemAlgorithm (dependance=<Algorithm[[Word],Stem]>) <Algorithm[[Word],Stem]> := <StemAlgorithm> | <StemWithDependanceAlgorithm> <StemAlgorithm> := StemAlgorithm () <StemWithDependanceAlgorithm> := StemWithDependanceAlgorithm (dependance=<Algorithm[[Sentence],Document]>) <Algorithm[[Sentence],Document]> := <TextAlgorithm> <TextAlgorithm> := TextAlgorithm () """, )
def optimize( fn, search_strategy=PESearch, generations=100, pop_size=10, allow_duplicates=False, logger=None, **kwargs, ): """ A general-purpose optimization function. Simply define any function `fn` with suitable parameter annotations and apply `optimize`. **Parameters**: * `search_strategy`: customize the search strategy. By default a `PESearch` will be performed. * `generations`: max number of generations to run. * `logger`: instance of `Logger` (or list) to pass to the search strategy. * `**kwargs`: additional keyword arguments passed to the search strategy constructor. """ params_func = _make_params_func(fn) @functools.wraps(fn) def eval_func(kwargs): return fn(**kwargs) grammar = generate_cfg(params_func) search = search_strategy( grammar, eval_func, pop_size=pop_size, allow_duplicates=allow_duplicates, **kwargs, ) best, best_fn = search.run(generations, logger=logger) return best, best_fn
def test_generate_from_class(): class A: def __init__(self): pass check_grammar(generate_cfg(A), "<A> := A ()")
@nice_repr class A: pass @nice_repr class B: pass @nice_repr class C: pass D = Union("D", B, C) @nice_repr class Main: def __init__(self, xs: Subset("xs", A, D)): self.xs = xs grammar = generate_cfg(Main) print(grammar) print(grammar.sample())
def __init__(self, algorithm, input, output): self.algorithm = algorithm self.input = tuple(input) self.output = tuple(output) self.grammar = generate_cfg(algorithm)
def test_create_grammar_for_generated_class(clss): from autogoal.grammar import generate_cfg generate_cfg(clss, registry=classes)
class LR(LogisticRegression): def __init__(self, penalty: Categorical("l1", "l2"), C: Continuous(0.1, 10)): super().__init__(penalty=penalty, C=C, solver="liblinear") # The `penalty: Categorical("l1", "l2")` annotation tells AutoGOAL that for this class the # parameter `penalty` can take values from a list of predefined values. Likewise the # `C: Continuous(0.1, 10)` annotation indicates that the parameter `C` can take a float value in a specified range. # Now we will use AutoGOAL to automatically generate different instances of our `LR` class. # With the class-based API we achieve this by building a **context-free grammar** that describes all possible instances. from autogoal.grammar import generate_cfg grammar = generate_cfg(LR) print(grammar) # ```bash # <LR> := LR (penalty=<LR_penalty>, C=<LR_C>) # <LR_penalty> := categorical (options=['l1', 'l2']) # <LR_C> := continuous (min=0.1, max=10) # ``` # As you can see, this grammar describes the set of all possible instances of `LR` by # describing how to call the constructor, and how to generate random values for its parameters. # !!! note # Formally, this a called a **Context-free grammar**. They are used in Computer Science to describe formal languages, # such as programming languages, mathematical expresions, etc. # Context-free grammars work by describing a set of replacement rules that you can apply recursively to
load previously saved corpus using pickle """ try: with open(fR"{path}/binary_X", 'rb') as xfd, open(fR"{path}/binary_Y", 'rb') as yfd: X = pickle.load(xfd) y = pickle.load(yfd) return X, y except Exception as e: #TODO: implement corpus reading from directories print(e) pass if __name__ == "__main__": g = generate_cfg(SklearnNLPClassifier) X, y = load_movie_reviews(100) # X, y = load_corpus("examples/Revolico") def fitness(pipeline): pipeline.fit(X, y) score = pipeline.score(X, y) return score search = RandomSearch(g, fitness, random_state=0, errors='warn', evaluation_timeout=100)
def test_create_grammar_for_generated_class(clss): try: generate_cfg(clss, registry=classes) except InterfaceIncompatibleError: pass
@nice_repr class Mult(Operator): def operate(self, left, right): return left * right @nice_repr class Concat(Operator): def operate(self, left, right): return int(str(left) + str(right)) Expr = Union("Expr", Number, Add, Mult, Concat) grammar = generate_cfg(Expr) print(grammar) # Our grammar is composed of addition, multiplication and concatenation operators. # Here are some possible examples: for i in range(100): try: solution = grammar.sample() print(solution) except ValueError: continue # To evaluate how good a formula is, we simply feed the expression instance # with a sequence of numbers from 1 to 9. If the expression requires more
("vect", vectorizer), ("decomp", decomposer), ("class", classifier), ]) # ## Creating the grammar # # Once everything is in place, we can tell `autogoal` to automatically infer a grammar # for all the possible combinations of parameters and clases that we can use. # The root of our grammar is the `Pipeline` class we just defined. The method `generate_cfg` # does exactly that, taking a class and building a context free grammar to construct # that class, based on the parameters' annotations and recursively building the corresponding # rules for all classes down to basic parameter types. grammar = generate_cfg(Pipeline) print(grammar) # If you run the code up to this point, it should print something like: # # ```bash # <Pipeline> := Pipeline (vectorizer=<Vectorizer>, decomposer=<Decomposer>, classifier=<Classifier>) # <Vectorizer> := <Count> | <TfIdf> # <Count> := Count (ngram=<Count_ngram>) # <Count_ngram> := discrete (min=1, max=3) # <TfIdf> := TfIdf (ngram=<TfIdf_ngram>, use_idf=<TfIdf_use_idf>) # <TfIdf_ngram> := discrete (min=1, max=3) # <TfIdf_use_idf> := boolean () # <Decomposer> := <NoDec> | <SVD> # <NoDec> := NoDec () # <SVD> := SVD (n=<SVD_n>)
def test_generate_from_method(): def f(): pass check_grammar(generate_cfg(f), "<f> := f ()")
from autogoal.search import SurrogateSearch, PESearch, ConsoleLogger from autogoal.grammar import Continuous, Sampler, generate_cfg from autogoal.utils import nice_repr @nice_repr class Input: def __init__(self, x: Continuous(-1, 1), y: Continuous(-1, 1), z: Continuous(-1, 1)): self.x = x self.y = y self.z = z def fn(t: Input): return -(t.x**2) - (t.y**2) - (t.z**2) search = SurrogateSearch(PESearch, None, 10, generator_fn=generate_cfg(Input), fitness_fn=fn) search.run(1000, ConsoleLogger())