def SearchSpaceNumberToSMAC(key: str, hp: SearchSpaceNumber) -> Hyperparameter: """Returns either a list of values intended to be sampled uniformly or a frozen scipy.stats distribution""" dist = "uniform" if hp.distribution: dist = hp.distribution if hp.maximum is None: raise ValueError( f"maximum not specified for a number with distribution {dist} for {key}" ) max = hp.getInclusiveMax() if hp.minimum is None: raise ValueError( f"minimum not specified for a number with distribution {dist} for {key}" ) min = hp.getInclusiveMin() log: bool if dist == "uniform" or dist == "integer": log = False elif dist == "loguniform": log = True else: raise ValueError(f"unknown/unsupported distribution {dist} for {key}") if hp.discrete: return UniformIntegerHyperparameter(key, min, max, log=log) else: return UniformFloatHyperparameter(key, min, max, log=log)
def visitSearchSpaceNumber(self, space: SearchSpaceNumber, path: str, counter=None): label = self.mk_label(path, counter) if space.pgo is not None: return scope.pgo_sample( space.pgo, hp.quniform(label, 0, len(space.pgo) - 1, 1) ) dist = "uniform" if space.distribution: dist = space.distribution if space.maximum is None: raise SearchSpaceError( path, f"maximum not specified for a number with distribution {dist}" ) max = space.getInclusiveMax() # if the maximum is not None, the inclusive maximum should not be none assert max is not None # These distributions need only a maximum if dist == "integer": if not space.discrete: raise SearchSpaceError( path, "integer distribution specified for a non discrete numeric type", ) return hp.randint(label, max) if space.minimum is None: raise SearchSpaceError( path, f"minimum not specified for a number with distribution {dist}" ) min = space.getInclusiveMin() # if the minimum is not None, the inclusive minimum should not be none assert min is not None if dist == "uniform": if space.discrete: return scope.int(hp.quniform(label, min, max, 1)) else: return hp.uniform(label, min, max) elif dist == "loguniform": # for log distributions, hyperopt requires that we provide the log of the min/max if min <= 0: raise SearchSpaceError( path, f"minimum of 0 specified with a {dist} distribution. This is not allowed; please set it (possibly using minimumForOptimizer) to be positive", ) if min > 0: min = math.log(min) if max > 0: max = math.log(max) if space.discrete: return scope.int(hp.qloguniform(label, min, max, 1)) else: return hp.loguniform(label, min, max) else: raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
def visitSearchSpaceNumber(self, space: SearchSpaceNumber, path: str, counter=None, useCounter=True): label = self.mk_label(path, counter, useCounter=useCounter) if space.pgo is not None: self.pgo_dict[label] = space.pgo return f"scope.pgo_sample(pgo_{label}, hp.quniform('{label}', {0}, {len(space.pgo)-1}, 1))" dist = "uniform" if space.distribution: dist = space.distribution if space.maximum is None: SearchSpaceError( path, f"maximum not specified for a number with distribution {dist}") max = space.getInclusiveMax() # These distributions need only a maximum if dist == "integer": if not space.discrete: raise SearchSpaceError( path, "integer distribution specified for a non discrete numeric type....", ) return f"hp.randint('{label}', {max})" if space.minimum is None: raise SearchSpaceError( path, f"minimum not specified for a number with distribution {dist}") min = space.getInclusiveMin() if dist == "uniform": if space.discrete: return f"hp.quniform('{label}', {min}, {max}, 1)" else: return f"hp.uniform('{label}', {min}, {max})" elif dist == "loguniform": # for log distributions, hyperopt requires that we provide the log of the min/max if min <= 0: raise SearchSpaceError( path, f"minimum of 0 specified with a {dist} distribution. This is not allowed; please set it (possibly using minimumForOptimizer) to be positive", ) if min > 0: min = math.log(min) if max > 0: max = math.log(max) if space.discrete: return f"hp.qloguniform('{label}', {min}, {max}, 1)" else: return f"hp.loguniform('{label}', {min}, {max})" else: raise SearchSpaceError(path, f"Unknown distribution type: {dist}")
def SearchSpaceNumberToGSValues( key: str, hp: SearchSpaceNumber, num_samples: Optional[int] = None) -> List[GSValue]: """Returns either a list of values intended to be sampled uniformly""" samples: int if num_samples is None: samples = DEFAULT_SAMPLES_PER_DISTRIBUTION else: samples = num_samples # Add preliminary support for PGO if hp.pgo is not None: ret = list(hp.pgo.samples(samples)) return ret # if we are not doing PGO dist = "uniform" if hp.distribution: dist = hp.distribution if hp.maximum is None: raise ValueError( f"maximum not specified for a number with distribution {dist} for {key}" ) max = hp.getInclusiveMax() assert max is not None if hp.minimum is None: raise ValueError( f"minimum not specified for a number with distribution {dist} for {key}" ) min = hp.getInclusiveMin() assert min is not None dt: np.dtype if hp.discrete: dt = np.dtype(int) else: dt = np.dtype(float) default = hp.default() if default is not None: # always use the default as one of the samples # TODO: ensure that the default is valid according to the schema if samples <= 1: return [default] samples = samples - 1 if dist == "uniform" or dist == "integer": ret = np.linspace(min, max, num=samples, dtype=dt).tolist() elif dist == "loguniform": ret = np.logspace(min, max, num=samples, dtype=dt).tolist() else: raise ValueError(f"unknown/unsupported distribution {dist} for {key}") if default is not None: ret.append(default) return ret
def schemaToSearchSpaceHelper_( self, longName, path: str, schema: JsonSchema, relevantFields: Optional[Set[str]], pgo_freqs: pgo_part = None, sub_space: bool = True, ) -> Optional[SearchSpace]: # TODO: handle degenerate cases # right now, this handles only a very fixed form if is_false_schema(schema): return None typ: Optional[str] = None typ = schema.get("laleType", None) if typ is None: typ = schema.get("type", None) else: typ = typ if "enum" in schema and typ != "operator": vals = schema["enum"] return SearchSpaceEnum(vals, pgo=asFreqs(pgo_freqs), default=get_default(schema)) if typ is not None: if typ == "boolean": return SearchSpaceBool(pgo=asFreqs(pgo_freqs), default=get_default(schema)) elif typ == "number" or typ == "integer": exclusive_minimum = False minimum = schema.get("minimumForOptimizer", None) if minimum is not None: exclusive_minimum = schema.get( "exclusiveMinimumForOptimizer", False) else: minimum = schema.get("minimum", None) if minimum is not None: exclusive_minimum = schema.get("exclusiveMinimum", False) exclusive_maximum = False maximum = schema.get("maximumForOptimizer", None) if maximum is not None: exclusive_maximum = schema.get( "exclusiveMaximumForOptimizer", False) else: maximum = schema.get("maximum", None) if maximum is not None: exclusive_maximum = schema.get("exclusiveMaximum", False) distribution = schema.get("distribution", None) laleType = schema.get("laleType", None) if laleType is None: laleType = typ if laleType == "number": discrete = False elif laleType == "integer": discrete = True else: raise OperatorSchemaError( path, f"specified laleType should be a number or integer, not: {laleType}.", ) pgo: Freqs return SearchSpaceNumber( minimum=minimum, exclusiveMinimum=exclusive_minimum, maximum=maximum, exclusiveMaximum=exclusive_maximum, discrete=discrete, distribution=distribution, pgo=asFreqs(pgo_freqs), default=get_default(schema), ) elif typ == "array" or typ == "tuple": laleType = schema.get("laleType", None) if laleType is None: laleType = typ is_tuple: bool = laleType == "tuple" min_items = schema.get("minItemsForOptimizer", None) if min_items is None: min_items = schema.get("minItems", None) if min_items is None: min_items = 0 max_items = schema.get("maxItemsForOptimizer", None) if max_items is None: max_items = schema.get("maxItems", None) items_schema = schema.get("itemsForOptimizer", None) if items_schema is None: items_schema = schema.get("items", None) if items_schema is None: raise OperatorSchemaError( path, f"An array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)", ) # we can search an empty list even without schemas if max_items == 0: if is_tuple: return SearchSpaceConstant([()]) else: return SearchSpaceConstant([[]]) prefix: Optional[List[SearchSpace]] = None additional: Optional[SearchSpace] = None if isinstance(items_schema, list): prefix = [] for i, sub_schema in enumerate(items_schema): sub = self.schemaToSearchSpaceHelper_( longName, path + "_" + str(i), sub_schema, relevantFields) if sub is None: return None else: prefix.append(sub) prefix_len = len(prefix) additional_items_schema = schema.get( "additionalItemsForOptimizer", None) if additional_items_schema is None: additional_items_schema = schema.get( "additionalItems", None) if additional_items_schema is None: if max_items is None or max_items > prefix_len: raise OperatorSchemaError( path, f"An array type was found with provided schemas for {prefix_len} elements, but either an unspecified or too high a maxItems, and no schema for the additionalItems. Please constraing maxItems to <= {prefix_len} (you can set maxItemsForOptimizer), or provide a schema for additionalItems", ) elif additional_items_schema is False: if max_items is None: max_items = prefix_len else: max_items = min(max_items, prefix_len) else: additional = self.schemaToSearchSpaceHelper_( longName, path + "-", additional_items_schema, relevantFields, ) # if items_schema is None: # raise ValueError(f"an array type was found without a provided schema for the items in the schema {schema}. Please provide a schema for the items (consider using itemsForOptimizer)") else: additional = self.schemaToSearchSpaceHelper_( longName, path + "-", items_schema, relevantFields) if max_items is None: raise OperatorSchemaError( path, f"An array type was found without a provided maximum number of items in the schema {schema}, and it is not a list with 'additionalItems' set to False. Please provide a maximum (consider using maxItemsForOptimizer), or, if you are using a list, set additionalItems to False", ) return SearchSpaceArray( prefix=prefix, minimum=min_items, maximum=max_items, additional=additional, is_tuple=is_tuple, ) elif typ == "object": if "properties" not in schema: return SearchSpaceObject(longName, [], []) o = self.JsonSchemaToSearchSpaceHelper( longName, path, schema, relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) if sub_space: return SearchSpaceDict(o) else: all_keys = list(o.keys()) all_keys.sort() o_choice = tuple([o.get(k, None) for k in all_keys]) return SearchSpaceObject(longName, all_keys, [o_choice]) elif typ == "string": pass elif typ == "operator": # TODO: If there is a default, we could use it vals = schema.get("enum", None) if vals is None: logger.error( "An operator is required by the schema but was not provided" ) return None sub_schemas: List[SearchSpace] = [ accept(op, self) if isinstance(op, Operator) else SearchSpaceConstant(op) for op in vals ] combined_sub_schema: SearchSpace if len(sub_schemas) == 1: combined_sub_schema = sub_schemas[0] if isinstance(combined_sub_schema, SearchSpaceConstant): return combined_sub_schema else: combined_sub_schema = SearchSpaceSum(sub_schemas) if all((isinstance(x, SearchSpaceConstant) for x in sub_schemas)): return combined_sub_schema return SearchSpaceOperator(combined_sub_schema) elif typ == "Any": raise OperatorSchemaError( path, f"A search space was found with laleType ({typ}), which is not searchable. Please mark the relevant hyperparameter as not relevant for the optimizer. schema: {schema}", ) else: raise OperatorSchemaError( path, f"An unknown type ({typ}) was found in the schema {schema}" ) if "anyOf" in schema: objs = [] for s_obj in schema["anyOf"]: if "type" in s_obj and s_obj["type"] == "object": o = self.JsonSchemaToSearchSpaceHelper( longName, path, s_obj, relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) if o: objs.append(o) if objs: # First, gather a list of all the properties keys_list = [set(o.keys()) for o in objs] # make sure the iterator is deterministic all_keys = list(set.union(*keys_list)) # and we might as well make it sorted all_keys.sort() def as_str(k, c): if c is None: return "None" else: return search_space_to_str_for_comparison( c, path + "_" + k) anys: Dict[str, Any] = {} for o in objs: o_choice = tuple([o.get(k, None) for k in all_keys]) k = str([ as_str(all_keys[idx], c) for idx, c in enumerate(o_choice) ]) if k in anys: logger.info( f"Ignoring Duplicate SearchSpace entry {k}") anys[k] = o_choice return SearchSpaceObject(longName, all_keys, anys.values()) else: return SearchSpaceObject(longName, [], []) if "allOf" in schema: # if all but one are negated constraints, we will just ignore them pos_sub_schema: List[JsonSchema] = [] for sub_schema in schema["allOf"]: if "not" not in sub_schema: pos_sub_schema.append(sub_schema) if len(pos_sub_schema) > 1: raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with more than one non-negated schemas ({pos_sub_schema})", ) if len(pos_sub_schema) == 0: raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}, because it is an allOf with only negated schemas", ) logger.debug( f"[{path}]: schemaToSearchSpaceHelper: ignoring negated schemas in the conjunction {schema}" ) return self.schemaToSearchSpaceHelper_( longName, path, pos_sub_schema[0], relevantFields, pgo_freqs=pgo_freqs, sub_space=sub_space, ) # TODO: handle degenerate cases raise OperatorSchemaError( path, f"schemaToSearchSpaceHelper does not yet know how to compile the given schema {schema}", )