def visitSearchSpaceSum(self, sum: SearchSpaceSum, path: str, counter=None): if len(sum.sub_spaces) == 1: return accept(sum.sub_spaces[0], self, "") else: unique_name: str = self.get_unique_name("choice") search_spaces = hp.choice( unique_name, [{str(i): accept(m, self, "")} for i, m in enumerate(sum.sub_spaces)], ) return search_spaces
def visitSearchSpaceSum(self, op: SearchSpaceSum) -> SearchSpaceGridInternalType: sub_spaces: List[SearchSpace] = op.sub_spaces sub_grids: Iterable[SearchSpaceGridInternalType] = [ accept(cur_space, self) for cur_space in sub_spaces ] if len(sub_spaces) == 1: return list(sub_grids)[0] else: fixed_grids: Iterable[List[SearchSpaceGrid]] = ( SearchSpaceToGridVisitor.fixupDegenerateSearchSpaces(grid) for grid in sub_grids) final_grids: List[SearchSpaceGrid] = [] for i, grids in enumerate(fixed_grids): if not grids: grids = [{}] else: # we need to add in this nesting # in case a higher order operator directly contains # another grids = nest_choice_all_HPparams(grids) discriminated_grids: List[SearchSpaceGrid] = [{ **d, discriminant_name: SearchSpaceConstant(i) } for d in grids] final_grids.extend(discriminated_grids) return final_grids
def visitSearchSpaceObject( self, space: SearchSpaceObject) -> List[SearchSpaceGrid]: keys = space.keys keys_len = len(keys) final_choices: List[SearchSpaceGrid] = [] for c in space.choices: assert keys_len == len(c) kvs_complex: List[List[SearchSpaceGrid]] = [] kvs_simple: SearchSpaceGrid = {} for k, v in zip(keys, c): vspace: Union[List[SearchSpaceGrid], SearchSpacePrimitive] = accept(v, self) if isinstance(vspace, SearchSpacePrimitive): kvs_simple[k] = vspace else: nested_vspace: List[SearchSpaceGrid] = nest_all_HPparams( k, vspace) if nested_vspace: kvs_complex.append(nested_vspace) nested_space_choices: Iterable[ Iterable[SearchSpaceGrid]] = itertools.product(*kvs_complex) nested_space_choices_lists: List[List[SearchSpaceGrid]] = list( map((lambda x: list(x)), nested_space_choices)) nested_space_choices_filtered: List[List[SearchSpaceGrid]] = [ ll for ll in nested_space_choices_lists if ll ] if nested_space_choices_filtered: chained_grids: Iterable[SearchSpaceGrid] = [ dict(ChainMap(*nested_choice, kvs_simple)) for nested_choice in nested_space_choices_filtered ] final_choices.extend(chained_grids) else: final_choices.append(kvs_simple) return final_choices
def _searchSpaceList(self, space: SearchSpaceArray, *, size: int) -> List[SearchSpaceGrid]: sub_spaces = space.items(max=size) param_grids: List[List[SearchSpaceGrid]] = [ nest_all_HPparams( str(index), self.fixupDegenerateSearchSpaces(accept(sub, self))) for index, sub in enumerate(sub_spaces) ] param_grids_product: Iterable[ Iterable[SearchSpaceGrid]] = itertools.product(*param_grids) chained_grids: List[SearchSpaceGrid] = [ dict(ChainMap(*gridline, )) for gridline in param_grids_product ] if space.is_tuple: st_val = structure_type_tuple else: st_val = structure_type_list discriminated_grids: List[SearchSpaceGrid] = [{ **d, structure_type_name: SearchSpaceConstant(st_val) } for d in chained_grids] return discriminated_grids
def visitSearchSpaceProduct( self, prod: SearchSpaceProduct, path: str, counter=None ): search_spaces = [ accept(space, self, self.get_unique_name(make_indexed_name(name, index))) for name, index, space in prod.get_indexed_spaces() ] return search_spaces
def visitSearchSpaceDict( self, sd: SearchSpaceDict, path: str, counter=None, useCounter=True ): search_spaces = ( name + ":" + accept(space, self, path + "_" + name) for name, space in sd.space_dict.items() ) return "{" + ",".join(search_spaces) + "}"
def visitSearchSpaceProduct( self, prod: SearchSpaceProduct, path: str, counter=None, useCounter=True ): search_spaces = ( accept(space, self, self.get_unique_name(make_indexed_name(name, index))) for name, index, space in prod.get_indexed_spaces() ) return "[" + ",".join(search_spaces) + "]"
def visitSearchSpaceDict(self, sd: SearchSpaceDict, path: str, counter=None): search_spaces = { name: accept(space, self, path + "_" + name) for name, space in sd.space_dict.items() } return search_spaces
def visitOperatorChoice(self, op: Ops.OperatorChoice) -> Dict[str, Any]: defaults_list: Iterable[Dict[str, Any]] = (accept(s, self) for s in op.steps()) defaults: Dict[str, Any] = {} for d in defaults_list: defaults.update(d) return defaults
def visitSearchSpaceSum( self, sum_space: SearchSpaceSum, path: str, counter=None, useCounter=True ): unique_name: str = self.get_unique_name("choice") sub_str: Iterable[str] = ( '"' + str(i) + '"' + " : " + '"' + accept(m, self, "") + '"' for i, m in enumerate(sum_space.sub_spaces) ) sub_spaces_str: str = "[" + ",".join(sub_str) + "]" return f"hp.choice({unique_name}, {sub_spaces_str})"
def visitPipeline(self, op: Ops.PlannedPipeline) -> Dict[str, Any]: defaults_list: Iterable[Dict[str, Any]] = (nest_HPparams( s.name(), accept(s, self)) for s in op.steps()) defaults: Dict[str, Any] = {} for d in defaults_list: defaults.update(d) return defaults
def asexpr(key, e): nonlocal child_counter if e is None: return None else: ee = accept(e, self, path + "_" + key, counter=child_counter) if child_counter is None: child_counter = 1 else: child_counter = child_counter + 1 return ee
def array_single_str_(self, space: SearchSpaceArray, path: str, num, useCounter=True) -> str: p = _mk_label(path, num, useCounter=useCounter) + "_" ret = "(" if space.is_tuple else "[" items: Iterable[SearchSpace] = space.items() ret += ",".join((accept(sub, self, p, counter=x, useCounter=useCounter) for x, sub in enumerate(items))) ret += ")" if space.is_tuple else "]" return ret
def run(cls, space: SearchSpace, name: str, counter=None, useCounter=True): visitor = cls(name) ret: str = "" body = accept(space, visitor, name, counter=counter, useCounter=useCounter) if visitor.pgo_header is not None: ret += visitor.pgo_header if visitor.nested_header is not None: ret += visitor.nested_header if visitor.decls: ret += visitor.decls + "\n" ret += "return " + body return ret
def cstr(key, x): nonlocal child_counter if x is None: return "None" else: s = accept( x, self, path + "_" + key, child_counter, useCounter=useCounter ) if child_counter is None: child_counter = 1 else: child_counter = child_counter + 1 return s
def visitSearchSpaceProduct( self, op: SearchSpaceProduct) -> SearchSpaceGridInternalType: sub_spaces = op.get_indexed_spaces() param_grids: List[List[SearchSpaceGrid]] = [ nest_all_HPparams( make_indexed_name(name, index), self.fixupDegenerateSearchSpaces(accept(space, self)), ) for name, index, space in sub_spaces ] param_grids_product: Iterable[ Iterable[SearchSpaceGrid]] = itertools.product(*param_grids) chained_grids: List[SearchSpaceGrid] = [ dict(ChainMap(*gridline)) for gridline in param_grids_product ] return chained_grids
def visitSearchSpaceDict( self, op: SearchSpaceDict) -> SearchSpaceGridInternalType: sub_spaces = op.space_dict.items() param_grids: List[List[SearchSpaceGrid]] = [ nest_all_HPparams( name, self.fixupDegenerateSearchSpaces(accept(space, self)), ) for name, space in sub_spaces ] param_grids_product: Iterable[ Iterable[SearchSpaceGrid]] = itertools.product(*param_grids) chained_grids: List[SearchSpaceGrid] = [ dict(ChainMap(*gridline)) for gridline in param_grids_product ] discriminated_grids: List[SearchSpaceGrid] = [{ **d, structure_type_name: SearchSpaceConstant(structure_type_dict) } for d in chained_grids] return discriminated_grids
def visitOperatorChoice(self, op: "OperatorChoice") -> SearchSpace: spaces: List[SearchSpace] = [accept(s, self) for s in op.steps()] return SearchSpaceSum(spaces)
def visitPlannedPipeline(self, op: "PlannedPipeline") -> SearchSpace: spaces: List[Tuple[str, SearchSpace]] = [(s.name(), accept(s, self)) for s in op.steps()] return SearchSpaceProduct(spaces)
def run(cls, op: PlannedOperator, pgo: Optional[PGO] = None, data_schema={}) -> SearchSpace: visitor = cls(pgo=pgo, data_schema=data_schema) return accept(op, visitor)
def run(cls, space: SearchSpace, name: str): visitor = cls(name) return accept(space, visitor, name)
def array_single_expr_(self, space: SearchSpaceArray, path: str, num): p = _mk_label(path, num) + "_" items: Iterable[SearchSpace] = space.items() ret = [accept(sub, self, p, counter=x) for x, sub in enumerate(items)] return tuple(ret) if space.is_tuple else ret
def run(cls, space: SearchSpace) -> List[SearchSpaceGrid]: visitor = cls() grids: SearchSpaceGridInternalType = accept(space, visitor) fixed_grids = cls.fixupDegenerateSearchSpaces(grids) return fixed_grids
def schemaToSearchSpaceHelper_( self, longName, path: str, schema: JsonSchema, relevantFields: Optional[Set[str]], pgo_freqs: pgo_part = None, sub_space: bool = True, ) -> Optional[SearchSpace]: # TODO: handle degenerate cases # right now, this handles only a very fixed form if is_false_schema(schema): return None typ: Optional[str] = None typ = schema.get("laleType", None) if typ is None: typ = schema.get("type", None) else: typ = typ if "enum" in schema and typ != "operator": vals = schema["enum"] return SearchSpaceEnum(vals, pgo=asFreqs(pgo_freqs), default=get_default(schema)) if typ is not None: if typ == "boolean": return SearchSpaceBool(pgo=asFreqs(pgo_freqs), default=get_default(schema)) elif typ == "number" or typ == "integer": exclusive_minimum = False minimum = schema.get("minimumForOptimizer", None) if minimum is not None: exclusive_minimum = schema.get( "exclusiveMinimumForOptimizer", False) else: minimum = schema.get("minimum", None) if minimum is not None: exclusive_minimum = schema.get("exclusiveMinimum", False) exclusive_maximum = False maximum = schema.get("maximumForOptimizer", None) if maximum is not None: exclusive_maximum = schema.get( "exclusiveMaximumForOptimizer", False) else: maximum = schema.get("maximum", None) if maximum is not None: exclusive_maximum = schema.get("exclusiveMaximum", False) distribution = schema.get("distribution", None) laleType = schema.get("laleType", None) if laleType is None: laleType = typ if laleType == "number": discrete = False elif laleType == "integer": discrete = True else: raise OperatorSchemaError( path, f"specified laleType should be a number or integer, not: {laleType}.", ) pgo: Freqs return SearchSpaceNumber( minimum=minimum, exclusiveMinimum=exclusive_minimum, maximum=maximum, exclusiveMaximum=exclusive_maximum, discrete=discrete, distribution=distribution, pgo=asFreqs(pgo_freqs), default=get_default(schema), ) elif typ == "array" or typ == "tuple": laleType = schema.get("laleType", None) if laleType is None: laleType = typ is_tuple: bool = laleType == "tuple" min_items = schema.get("minItemsForOptimizer", None) if min_items is None: min_items = schema.get("minItems", None) if min_items is None: min_items = 0 max_items = schema.get("maxItemsForOptimizer", None) if max_items is None: max_items = schema.get("maxItems", None) items_schema = schema.get("itemsForOptimizer", None) if items_schema is None: items_schema = schema.get("items", None) if items_schema is None: raise OperatorSchemaError( path, f"An array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)", ) # we can search an empty list even without schemas if max_items == 0: if is_tuple: return SearchSpaceConstant([()]) else: return SearchSpaceConstant([[]]) prefix: Optional[List[SearchSpace]] = None additional: Optional[SearchSpace] = None if isinstance(items_schema, list): prefix = [] for i, sub_schema in enumerate(items_schema): sub = self.schemaToSearchSpaceHelper_( longName, path + "_" + str(i), sub_schema, relevantFields) if sub is None: return None else: prefix.append(sub) prefix_len = len(prefix) additional_items_schema = schema.get( "additionalItemsForOptimizer", None) if additional_items_schema is None: additional_items_schema = schema.get( "additionalItems", None) if additional_items_schema is None: if max_items is None or max_items > prefix_len: raise OperatorSchemaError( path, f"An array type was found with provided schemas for {prefix_len} elements, but either an unspecified or too high a maxItems, and no schema for the additionalItems. Please constraing maxItems to <= {prefix_len} (you can set maxItemsForOptimizer), or provide a schema for additionalItems", ) elif additional_items_schema is False: if max_items is None: max_items = prefix_len else: max_items = min(max_items, prefix_len) else: additional = self.schemaToSearchSpaceHelper_( longName, path + "-", additional_items_schema, relevantFields, ) # if items_schema is None: # raise ValueError(f"an array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)") else: additional = self.schemaToSearchSpaceHelper_( longName, path + "-", items_schema, relevantFields) if max_items is None: raise OperatorSchemaError( path, f"An array type was found without a provided maximum number of items in the schema {schema}, and it is not a list with 'additionalItems' set to False. Please provide a maximum (consider using maxItemsForOptimizer), or, if you are using a list, set additionalItems to False", ) return SearchSpaceArray( prefix=prefix, minimum=min_items, maximum=max_items, additional=additional, is_tuple=is_tuple, ) elif typ == "object": if "properties" not in schema: return SearchSpaceObject(longName, [], []) o = self.JsonSchemaToSearchSpaceHelper( longName, path, schema, relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) if sub_space: return SearchSpaceDict(o) else: all_keys = list(o.keys()) all_keys.sort() o_choice = tuple([o.get(k, None) for k in all_keys]) return SearchSpaceObject(longName, all_keys, [o_choice]) elif typ == "string": pass elif typ == "operator": # TODO: If there is a default, we could use it vals = schema.get("enum", None) if vals is None: logger.error( "An operator is required by the schema but was not provided" ) return None sub_schemas: List[SearchSpace] = [ accept(op, self) if isinstance(op, Operator) else SearchSpaceConstant(op) for op in vals ] combined_sub_schema: SearchSpace if len(sub_schemas) == 1: combined_sub_schema = sub_schemas[0] if isinstance(combined_sub_schema, SearchSpaceConstant): return combined_sub_schema else: combined_sub_schema = SearchSpaceSum(sub_schemas) if all((isinstance(x, SearchSpaceConstant) for x in sub_schemas)): return combined_sub_schema return SearchSpaceOperator(combined_sub_schema) elif typ == "Any": raise OperatorSchemaError( path, f"A search space was found with laleType ({typ}), which is not searchable. Please mark the relevant hyperparameter as not relevant for the optimizer. schema: {schema}", ) else: raise OperatorSchemaError( path, f"An unknown type ({typ}) was found in the schema {schema}" ) if "anyOf" in schema: objs = [] for s_obj in schema["anyOf"]: if "type" in s_obj and s_obj["type"] == "object": o = self.JsonSchemaToSearchSpaceHelper( longName, path, s_obj, relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) if o: objs.append(o) if objs: # First, gather a list of all the properties keys_list = [set(o.keys()) for o in objs] # make sure the iterator is deterministic all_keys = list(set.union(*keys_list)) # and we might as well make it sorted all_keys.sort() def as_str(k, c): if c is None: return "None" else: return search_space_to_str_for_comparison( c, path + "_" + k) anys: Dict[str, Any] = {} for o in objs: o_choice = tuple([o.get(k, None) for k in all_keys]) k = str([ as_str(all_keys[idx], c) for idx, c in enumerate(o_choice) ]) if k in anys: logger.info( f"Ignoring Duplicate SearchSpace entry {k}") anys[k] = o_choice return SearchSpaceObject(longName, all_keys, anys.values()) else: return SearchSpaceObject(longName, [], []) if "allOf" in schema: # if all but one are negated constraints, we will just ignore them pos_sub_schema: List[JsonSchema] = [] for sub_schema in schema["allOf"]: if "not" not in sub_schema: pos_sub_schema.append(sub_schema) if len(pos_sub_schema) > 1: raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with more than one non-negated schemas ({pos_sub_schema})", ) if len(pos_sub_schema) == 0: raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with only negated schemas", ) logger.debug( f"[{path}]: schemaToSearchSpaceHelper: ignoring negated schemas in the conjunction {schema}" ) return self.schemaToSearchSpaceHelper_( longName, path, pos_sub_schema[0], relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) # TODO: handle degenerate cases raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}", )
def run(cls, op: Ops.Operator) -> Dict[str, Any]: visitor = cls() return accept(op, visitor)
def visitSearchSpaceOperator(self, op: SearchSpaceOperator, path: str, counter=None): return scope.make_nested_hyperopt(accept(op.sub_space, self, path))
def run(cls, op: PlannedOperator, pgo: Optional[PGO] = None) -> SearchSpace: visitor = cls(pgo=pgo) return accept(op, visitor)
def visitSearchSpaceOperator( self, op: SearchSpaceOperator) -> SearchSpaceGridInternalType: return accept(op.sub_space, self)