def shaped_to_invalid_mix(shaped: Shaped) -> Strategy[Mix]: polygons = shaped_to_polygons(shaped) vertices = list( flatten( chain(polygon.border.vertices, flatten(hole.vertices for hole in polygon.holes)) for polygon in polygons)) edges = list(flatten(polygon.edges for polygon in polygons)) scales = strategies.integers(2, 100) rational_edges = strategies.sampled_from(edges).map(to_rational_segment) return (strategies.builds(Mix, sub_lists(vertices, min_size=2).map(Multipoint), empty_geometries, strategies.just(shaped)) | strategies.builds( Mix, empty_geometries, strategies.builds(stretch_segment_end, rational_edges, scales), strategies.just(shaped)) | strategies.builds( Mix, empty_geometries, strategies.builds(stretch_segment_start, rational_edges, scales), strategies.just(shaped)) | strategies.builds( Mix, empty_geometries, strategies.sampled_from(polygons).map(to_polygon_diagonals), strategies.just(shaped)) | strategies.builds(Mix, empty_geometries, sub_lists(edges, min_size=2).map(Multisegment), strategies.just(shaped)))
def _to_initializer_body( signature_data: SignatureData, field_name_factory: Operator[str], instance_object_name: str = SELF_PARAMETER_NAME) -> List[ast.stmt]: if signature_data: def to_right_hand_value(parameter_name: str, is_callable: bool) -> ast.expr: return (ast.Call( ast.Attribute( _to_loaded_name(TYPES_MODULE_ALIAS), 'MethodType', ast.Load()), [ ast.Lambda(_to_unit_signature(instance_object_name), _to_loaded_name(parameter_name)), _to_loaded_name(instance_object_name) ], []) if is_callable else _to_loaded_name(parameter_name)) return ([ ast.Import([ast.alias(TYPES_MODULE_NAME, TYPES_MODULE_ALIAS)]) ] + [ ast.Assign([ ast.Attribute(ast.Name(instance_object_name, ast.Load()), field_name_factory(parameter_name), ast.Store()) ], to_right_hand_value(parameter_name, is_callable)) for parameter_name, _, is_callable in flatten(signature_data.values()) ]) else: return [ast.Pass()]
def _to_custom_constructor_body( signature_data: SignatureData, class_parameter_name: str = CLASS_PARAMETER_NAME, positional_kinds: Sequence[inspect._ParameterKind] = ( inspect._POSITIONAL_ONLY, inspect._POSITIONAL_OR_KEYWORD) ) -> List[ast.stmt]: positionals = flatten( signature_data.get(kind, []) for kind in positional_kinds) keywords = signature_data.get(inspect._KEYWORD_ONLY, []) positional_arguments = [ _to_loaded_name(parameter_name) for parameter_name, *_ in positionals ] if inspect._VAR_POSITIONAL in signature_data: (variadic_positional_name, *_), = (signature_data[inspect._VAR_POSITIONAL]) variadic_positional_node = ast.Starred( _to_loaded_name(variadic_positional_name), ast.Load()) positional_arguments.append(variadic_positional_node) keyword_arguments = [ ast.keyword(name, _to_loaded_name(name)) for name, _, is_callable in keywords ] if inspect._VAR_KEYWORD in signature_data: (variadic_keyword_name, *_), = signature_data[inspect._VAR_KEYWORD] variadic_keyword_node = ast.keyword( None, _to_loaded_name(variadic_keyword_name)) keyword_arguments.append(variadic_keyword_node) return [ ast.Return( ast.Call(_to_loaded_name(class_parameter_name), positional_arguments, keyword_arguments)) ]
def to_names_with_signatures_data( signature_data: SignatureData ) -> Strategy[Tuple[str, SignatureData]]: used_names = frozenset( map(itemgetter(0), flatten(signature_data.values()))) return strategies.tuples( names.filter(lambda name: name not in used_names), strategies.just(signature_data))
def test_mutate_any_node(): # should be able to mutate every node (except 'root') sampler = mutate.get_random_mutation_sampler() rule_counter = Counter() for p in data.pipelines: t = pt.to_tree(p) t.annotate() flat_nodes = flatten(t) for n in flat_nodes: rules = sampler.sample_rules(n, return_proba=True) # always has one rule per node assert len(rules) == 1 rule, prob = rules[0] rule_counter[type(rule)] += 1 if pt.is_root_node(n): assert rule is None assert prob == 0.0 continue if pt.is_composition_op(n): # can only insert for these if n.label.endswith("Pipeline"): assert isinstance(rule, ComponentInsert) if n.label.endswith("FeatureUnion"): assert isinstance(rule, (ComponentInsert, ComponentRemove)) if pt.is_param_node(n): assert n.parent is not None parent_label = n.parent.label param = n.payload[0] if parent_label not in sampler.config or param not in sampler.config[ parent_label]: assert isinstance(rule, HyperparamRemove) or rule is None if rule is None: assert prob == 0.0 else: assert prob > 0.0 continue if param == "score_func": assert rule is None assert prob == 0.0 continue if pt.is_component_node(n): if isinstance(rule, ComponentInsert): assert pt.is_composition_node(n) # all others should have non-identity rule assert rule is not None assert prob > 0.0 # should have all rule types (bad luck if don't....) for t in RULE_TYPES: assert rule_counter.get(t, 0.0) > 0
def test_node_prob(): sampler = mutate.get_random_mutation_sampler() for p in data.pipelines: t = pt.to_tree(p) flat_nodes = flatten(t) all_probs = [] for n in flat_nodes: prob = sampler.get_probability(rule=None, node=n) assert prob >= 0.0 and prob <= 1.0 all_probs.append(prob) # would be terrible luck if this failed... assert max(all_probs) > 0.0
hashables = (scalars | strings | to_homogeneous_frozensets(deferred_hashables) | to_homogeneous_tuples(deferred_hashables)) iterables = (strings | memory_views | to_homogeneous_sequences(deferred_objects)) sets = to_homogeneous_sets(hashables) built_ins = vars(builtins).values() built_in_callables = list(filter(callable, built_ins)) built_in_classes = [ callable_ for callable_ in built_in_callables if isinstance(callable_, type) ] built_in_classes_fields = list( flatten(vars(class_).values() for class_ in built_in_classes)) def round_trippable_built_in(object_: Any) -> bool: try: candidate = eval(serializers.complex_(object_), {builtins.__name__: builtins}) except Exception: return False else: return candidate is object_ objects = (hashables | iterables | sets
def _assert_fail(random_func, Serializer, kwargs): data = {key: val for key, val in kwargs.items() if val is not None} obj = next(flatten(random_func(1))) serializer = Serializer(obj, data=data, partial=True) valid = serializer.is_valid() assert not valid or (valid and not data)
def test_beam_enumerator(): p1 = Pipeline([("clf", LogisticRegression(penalty="l2"))]) p2 = Pipeline([("clf", LogisticRegression(penalty="l1"))]) _, ops = pt.tree_edit_distance(p1, p2, return_operations=True) update_op = [o for o in ops if is_update_edit(o)][0] # rule 1: penalty=l2 -> penalty=l1 r1 = HyperparamUpdate(update_op) p5 = Pipeline([("clf", LogisticRegression(penalty="elasticnet"))]) _, ops = pt.tree_edit_distance(p1, p5, return_operations=True) update_op = [o for o in ops if is_update_edit(o)][0] # rule 1.5: penalty=l2 -> penalty=elasticnet r1_5 = HyperparamUpdate(update_op) p3 = Pipeline([("s0", MinMaxScaler()), ("clf", LogisticRegression(penalty="l2"))]) _, ops = pt.tree_edit_distance(p3, p1, return_operations=True) remove_op = [o for o in ops if is_remove_edit(o)][0] # rule 2: remove MinMaxScaler r2 = ComponentRemove(remove_op) p4 = Pipeline([("s0", StandardScaler()), ("clf", LogisticRegression(penalty="l2"))]) _, ops = pt.tree_edit_distance(p1, p4, return_operations=True) insert_op = [o for o in ops if is_insert_edit(o)][0] augedit = AugmentedEdit(insert_op, get_parent_match_edit(insert_op, ops)) # rule 3: insert StandardScaler r3 = ComponentInsert(augedit) n1 = r1.pre n2 = r2.pre n3 = r3.pre rules = { get_node_key(n1): [r1, r1_5], get_node_key(n2): [r2], get_node_key(n3): [r3], } node_probs = { get_node_key(n1): 0.5, get_node_key(n2): 0.2, get_node_key(n3): 0.3, } cond_probs = { r1: 0.7, r1_5: 0.15, r2: 0.3, r3: 0.1, } rule_sampler = FakeRuleSampler(rules, node_probs, cond_probs) rs = rule_sampler.sample_rules(n1, return_proba=True) assert len(rs) == 2 enumerator = tree_enumerator.RepeatedBeamSearchEnumerator( rule_sampler, force_apply=False, ) # # should sort max to min prob by rules t1 = pt.to_tree(p1).annotate() opt_rules = enumerator.collect_node_rule_probs(t1, past_rules=[], acc={}) flat_nodes = flatten(t1) assert len(opt_rules) == len(flat_nodes) # rule 1 is best for that node target_n = next(n for n in flat_nodes if n.label == r1.pre.label) opt_rule_and_prob = opt_rules[target_n] assert opt_rule_and_prob[0] == r1 # we normalize the conditional probabilities to those that # can be applied: norm_cond_prob = cond_probs[r1] / (cond_probs[r1] + cond_probs[r1_5]) expected_prob = node_probs[get_node_key(target_n)] * norm_cond_prob assert (opt_rule_and_prob[1] - expected_prob) < 1e-5 # if we collect optimal node/rules again after using r1, we should get r1_5 # for that node opt_rules = enumerator.collect_node_rule_probs(t1, past_rules=[r1], acc={}) opt_rule_and_prob = opt_rules[target_n] assert opt_rule_and_prob[0] == r1_5 # we normalize the conditional probabilities to those that # can be applied: norm_cond_prob = cond_probs[r1_5] / (cond_probs[r1] + cond_probs[r1_5]) expected_prob = node_probs[get_node_key(target_n)] * norm_cond_prob assert (opt_rule_and_prob[1] - expected_prob) < 1e-5 new_trees, lineage = list( enumerator.derive_variant_trees(t1, k=5, past_rules=[])) # at most 2 (even though k = 5) # penalty=l2->l1, insert(StandardScaler) assert len(new_trees) == 2 assert list(lineage[0])[0] == r1 assert list(lineage[1])[0] == r3 gen = enumerator.enumerate(p1, k=5) trees_t1 = list(gen) # l2->l1, insert(StandardScaler), l2->elastic assert len(trees_t1) == 3 gen = enumerator.enumerate(p5, k=5) trees_t2 = list(gen) # insert(StandardScaler) assert len(trees_t2) == 1 gen = enumerator.enumerate(p3, k=10) trees_t3 = list(gen) # (Overall possible rules): outcome pipeline # l2 -> l1: MinMax, LR(l1) (yes) # l2 -> elastic: MinMax, LR(elastic) # insert(StandardScaler): MinMax, SS, LR(l2) (yes) # remove(MinMaxScaler): LR(L2) (yes) # insert(SS), l2->l1: MinMax, SS, LR(l1) (yes) # insert(SS), l2->elastic: MinMax, SS, LR(elastic) # remove(MM), l2->l1: LR(l1) (yes) # remove(MM), l2->elastic: LR(elastic) # remove(MM), insert(SS) SS, LR(l2) (yes) # remove(MM), insert(SS), l2->l1: SS, LR(l1) (yes) # remove(MM), insert(SS), l2->elastic: SS, LR(elastic) assert len(trees_t3) < 11