def __eq__(self, other: "Base"): for field in self.__dict__.keys(): self_val = getattr(self, field) other_val = getattr(other, field) self_val = numpy_type_to_python_type(self_val) other_val = numpy_type_to_python_type(other_val) if type(self_val) != type(other_val): return False if field == "_experiment": # prevent infinite loop when checking equality of Trials equal = self_val.name == other_val.name elif isinstance(self_val, list): equal = same_elements(self_val, other_val) elif isinstance(self_val, np.ndarray): equal = np.array_equal(self_val, other_val) elif isinstance(self_val, datetime): equal = datetime_equals(self_val, other_val) elif isinstance(self_val, pd.DataFrame): equal = self_val.equals(other_val) else: equal = self_val == other_val if not equal: return False return True
def __eq__(self, other: "Base") -> bool: for field in self.__dict__.keys(): self_val = getattr(self, field) other_val = getattr(other, field) self_val = numpy_type_to_python_type(self_val) other_val = numpy_type_to_python_type(other_val) if type(self_val) != type(other_val): return False if field == "_experiment": # prevent infinite loop when checking equality of Trials equal = self_val is other_val is None or (self_val._name == other_val._name) elif field == "_model": # pragma: no cover (tested in modelbridge) # TODO[T52643706]: replace with per-`ModelBridge` method like # `equivalent_models`, to compare models more meaningfully. if not hasattr(self_val, "model"): equal = not hasattr(other_val, "model") else: # If model bridges have a `model` attribute, the types of the # values of those attributes should be equal if the model # bridge is the same. equal = isinstance(self_val.model, type(other_val.model)) elif isinstance(self_val, list): equal = same_elements(self_val, other_val) elif isinstance(self_val, dict): equal = sorted(self_val.keys()) == sorted(other_val.keys()) equal = equal and same_elements(list(self_val.values()), list(other_val.values())) elif isinstance(self_val, np.ndarray): equal = np.array_equal(self_val, other_val) elif isinstance(self_val, datetime): equal = datetime_equals(self_val, other_val) elif isinstance(self_val, float): equal = np.isclose(self_val, other_val) elif isinstance(self_val, pd.DataFrame): try: if self_val.empty and other_val.empty: equal = True else: pd.testing.assert_frame_equal( self_val.sort_index(axis=1), other_val.sort_index(axis=1), check_exact=False, ) equal = True except AssertionError: equal = False else: equal = self_val == other_val if not equal: return False return True
def object_attribute_dicts_equal( one_dict: Dict[str, Any], other_dict: Dict[str, Any] ) -> bool: """Utility to check if all items in attribute dicts of two Ax objects are the same. NOTE: Special-cases some Ax object attributes, like "_experiment" or "_model", where full equality is hard to check. """ for field in one_dict: one_val = one_dict.get(field) other_val = other_dict.get(field) one_val = numpy_type_to_python_type(one_val) other_val = numpy_type_to_python_type(other_val) if type(one_val) != type(other_val): return False if field == "_experiment": # prevent infinite loop when checking equality of Trials equal = one_val is other_val is None or (one_val._name == other_val._name) elif field == "_model": # pragma: no cover (tested in modelbridge) # TODO[T52643706]: replace with per-`ModelBridge` method like # `equivalent_models`, to compare models more meaningfully. if not hasattr(one_val, "model"): equal = not hasattr(other_val, "model") else: # If model bridges have a `model` attribute, the types of the # values of those attributes should be equal if the model # bridge is the same. equal = isinstance(one_val.model, type(other_val.model)) elif isinstance(one_val, list): equal = same_elements(one_val, other_val) elif isinstance(one_val, dict): equal = sorted(one_val.keys()) == sorted(other_val.keys()) equal = equal and same_elements( list(one_val.values()), list(other_val.values()) ) elif isinstance(one_val, np.ndarray): equal = np.array_equal(one_val, other_val) elif isinstance(one_val, datetime): equal = datetime_equals(one_val, other_val) elif isinstance(one_val, float): equal = np.isclose(one_val, other_val) elif isinstance(one_val, pd.DataFrame): equal = dataframe_equals(one_val, other_val) else: equal = one_val == other_val if not equal: return False return True
def raw_data_to_evaluation(raw_data: TEvaluationOutcome, objective_name: str) -> TEvaluationOutcome: """Format the trial evaluation data to a standard `TTrialEvaluation` (mapping from metric names to a tuple of mean and SEM) representation, or to a TFidelityTrialEvaluation. Note: this function expects raw_data to be data for a `Trial`, not a `BatchedTrial`. """ if isinstance(raw_data, dict): if any(isinstance(x, dict) for x in raw_data.values()): # pragma: no cover raise ValueError("Raw data is expected to be just for one arm.") return raw_data elif isinstance(raw_data, list): return raw_data elif isinstance(raw_data, tuple): return {objective_name: raw_data} elif isinstance(raw_data, (float, int)): return {objective_name: (raw_data, None)} # pyre-fixme[7] elif isinstance(raw_data, (np.float32, np.float64, np.int32, np.int64)): return {objective_name: (numpy_type_to_python_type(raw_data), None)} else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean.")
def object_to_json(object: Any) -> Any: """Convert an Ax object to a JSON-serializable dictionary. The root node passed to this function should always be an instance of a core Ax class or a JSON-compatible python builtin. The sub-fields of the input will then be recursively passed to this function. e.g. if we pass an instance of Experiment, we will first fall through to the line `object_dict = ENCODER_REGISTRY[_type](object)`, which will convert the Experiment to a (shallow) dictionary, where search subfield remains "unconverted", i.e.: {"name": <name: string>, "search_space": <search space: SearchSpace>}. We then pass each item of the dictionary back into this function to recursively convert the entire object. """ object = numpy_type_to_python_type(object) _type = type(object) if _type in (str, int, float, bool, type(None)): return object elif _type is list: return [object_to_json(x) for x in object] elif _type is tuple: return tuple(object_to_json(x) for x in object) elif _type is dict: return {k: object_to_json(v) for k, v in object.items()} elif _type is OrderedDict: return { "__type": _type.__name__, "value": [(k, object_to_json(v)) for k, v in object.items()], } elif _type is datetime.datetime: return { "__type": _type.__name__, "value": datetime.datetime.strftime(object, "%Y-%m-%d %H:%M:%S.%f"), } elif _type is pd.DataFrame: return {"__type": _type.__name__, "value": object.to_json()} elif _is_named_tuple(object): return { "__type": _type.__name__, **{k: object_to_json(v) for k, v in object._asdict().items()}, } elif issubclass(_type, enum.Enum): return {"__type": _type.__name__, "name": object.name} elif _type is np.ndarray or issubclass(_type, np.ndarray): return {"__type": _type.__name__, "value": object.tolist()} elif isclass(object) and issubclass(object, Transform): # There is no other way to check is object is of type Type[Transform]. _type = Type[Transform] if _type not in ENCODER_REGISTRY: err = ( f"Object {object} passed to `object_to_json` (of type {_type}) is " f"not registered with a corresponding encoder in ENCODER_REGISTRY." ) raise JSONEncodeError(err) object_dict = ENCODER_REGISTRY[_type](object) return {k: object_to_json(v) for k, v in object_dict.items()}
def evaluation_function_outer( self, parameterization: TParameterization, weight: Optional[float] = None ) -> Dict[str, Tuple[float, float]]: signature = inspect.signature(self._evaluation_function) num_evaluation_function_params = len(signature.parameters.items()) if num_evaluation_function_params == 1: # pyre-fixme[20]: Anonymous call expects argument `$1`. evaluation = self._evaluation_function(parameterization) elif num_evaluation_function_params == 2: evaluation = self._evaluation_function(parameterization, weight) else: raise ValueError( # pragma: no cover "Evaluation function must take either one parameter " "(parameterization) or two parameters (parameterization and weight)." ) if isinstance(evaluation, dict): return evaluation elif isinstance(evaluation, tuple): return {self.optimization_config.objective.metric.name: evaluation} elif isinstance(evaluation, (float, int)): return {self.optimization_config.objective.metric.name: (evaluation, 0.0)} elif isinstance(evaluation, (np.float32, np.float64, np.int32, np.int64)): return { self.optimization_config.objective.metric.name: ( numpy_type_to_python_type(evaluation), 0.0, ) } raise Exception( # pragma: no cover "Evaluation function returned an invalid type. The function must " "either return a dictionary of metric names to mean, sem tuples " "or a single mean, sem tuple, or a single mean." )
def raw_data_to_evaluation( raw_data: TEvaluationOutcome, objective_name: str ) -> Union[TTrialEvaluation, TFidelityTrialEvaluation]: """Format the trial evaluation data to a standard `TTrialEvaluation` (mapping from metric names to a tuple of mean and SEM) representation, or to a TFidelityTrialEvaluation. Note: this function expects raw_data to be data for a `Trial`, not a `BatchedTrial`. """ # `BatchedTrial` data not expected because it was not needed, to add (if # need arises), make raw_data a mapping from arm name to TEvaluationOutcome. if isinstance(raw_data, dict): return raw_data elif isinstance(raw_data, list): return raw_data elif isinstance(raw_data, tuple): return {objective_name: raw_data} elif isinstance(raw_data, (float, int)): return {objective_name: (raw_data, None)} # pyre-fixme[7] elif isinstance(raw_data, (np.float32, np.float64, np.int32, np.int64)): return {objective_name: (numpy_type_to_python_type(raw_data), None)} else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean." )
def fields_equal(self, other: "SQABase", field: str) -> bool: """Check if `field` on `self` is equal to `field` on `other`.""" self_val = getattr(self, field) other_val = getattr(other, field) self_val = numpy_type_to_python_type(self_val) other_val = numpy_type_to_python_type(other_val) if type(self_val) != type(other_val): return False if isinstance(self_val, list): return SQABase.list_equals(self_val, other_val) elif isinstance(self_val, SQABase): return self_val.equals(other_val) elif isinstance(self_val, datetime): return datetime_equals(self_val, other_val) elif isinstance(self_val, float): return np.isclose(self_val, other_val) else: return self_val == other_val
def _numpy_types_to_python_types( parameterization: TParameterization, ) -> TParameterization: """If applicable, coerce values of the parameterization from Numpy int/float to Python int/float. """ return { name: numpy_type_to_python_type(value) for name, value in parameterization.items() }
def md5hash(parameters: TParameterization) -> str: """Return unique identifier for arm's parameters. Args: parameters: Parameterization; mapping of param name to value. Returns: Hash of arm's parameters. """ for k, v in parameters.items(): parameters[k] = numpy_type_to_python_type(v) parameters_str = json.dumps(parameters, sort_keys=True) return hashlib.md5(parameters_str.encode("utf-8")).hexdigest()
def raw_data_to_evaluation( raw_data: TEvaluationOutcome, metric_names: List[str], start_time: Optional[int] = None, end_time: Optional[int] = None, ) -> TEvaluationOutcome: """Format the trial evaluation data to a standard `TTrialEvaluation` (mapping from metric names to a tuple of mean and SEM) representation, or to a TMapTrialEvaluation. Note: this function expects raw_data to be data for a `Trial`, not a `BatchedTrial`. """ if isinstance(raw_data, dict): if any(isinstance(x, dict) for x in raw_data.values()): # pragma: no cover raise ValueError("Raw data is expected to be just for one arm.") for metric_name, dat in raw_data.items(): if not isinstance(dat, tuple): if not isinstance(dat, (float, int)): raise ValueError( "Raw data for an arm is expected to either be a tuple of " "numerical mean and SEM or just a numerical mean." f"Got: {dat} for metric '{metric_name}'." ) raw_data[metric_name] = (float(dat), None) return raw_data elif len(metric_names) > 1: raise ValueError( "Raw data must be a dictionary of metric names to mean " "for multi-objective optimizations." ) elif isinstance(raw_data, list): return raw_data elif isinstance(raw_data, tuple): return {metric_names[0]: raw_data} elif isinstance(raw_data, (float, int)): return {metric_names[0]: (raw_data, None)} elif isinstance(raw_data, (np.float32, np.float64, np.int32, np.int64)): return {metric_names[0]: (numpy_type_to_python_type(raw_data), None)} else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean." )
def test_numpy_type_to_python_type(self): self.assertEqual(type(numpy_type_to_python_type(np.int64(2))), int) self.assertEqual(type(numpy_type_to_python_type(np.float64(2))), float)
def object_to_json(obj: Any) -> Any: """Convert an Ax object to a JSON-serializable dictionary. The root node passed to this function should always be an instance of a core Ax class or a JSON-compatible python builtin. The sub-fields of the input will then be recursively passed to this function. e.g. if we pass an instance of Experiment, we will first fall through to the line `object_dict = ENCODER_REGISTRY[_type](object)`, which will convert the Experiment to a (shallow) dictionary, where search subfield remains "unconverted", i.e.: {"name": <name: string>, "search_space": <search space: SearchSpace>}. We then pass each item of the dictionary back into this function to recursively convert the entire object. """ obj = numpy_type_to_python_type(obj) _type = type(obj) # Type[MyClass] encoding (encoding of classes, not instances) if isclass(obj): for class_type in CLASS_ENCODER_REGISTRY: if issubclass(obj, class_type): obj_dict = CLASS_ENCODER_REGISTRY[class_type](obj) return {k: object_to_json(v) for k, v in obj_dict.items()} raise ValueError( f"{obj} is a class. Add it to the CLASS_ENCODER_REGISTRY " "(and remove it from the ENCODER_REGISTRY if needed).") if _type in ENCODER_REGISTRY: obj_dict = ENCODER_REGISTRY[_type](obj) return {k: object_to_json(v) for k, v in obj_dict.items()} # Python built-in types + `typing` module types if _type in (str, int, float, bool, type(None)): return obj elif _type is list: return [object_to_json(x) for x in obj] elif _type is tuple: return tuple(object_to_json(x) for x in obj) elif _type is dict: return {k: object_to_json(v) for k, v in obj.items()} elif _is_named_tuple(obj): return { # pragma: no cover "__type": _type.__name__, **{k: object_to_json(v) for k, v in obj._asdict().items()}, } elif dataclasses.is_dataclass(obj): return { "__type": _type.__name__, **{ k: object_to_json(v) for k, v in dataclasses.asdict(obj).items() }, } # Types from libraries, commonly used in Ax (e.g., numpy, pandas, torch) elif _type is OrderedDict: return { "__type": _type.__name__, "value": [(k, object_to_json(v)) for k, v in obj.items()], } elif _type is datetime.datetime: return { "__type": _type.__name__, "value": datetime.datetime.strftime(obj, "%Y-%m-%d %H:%M:%S.%f"), } elif _type is pd.DataFrame: return {"__type": _type.__name__, "value": obj.to_json()} elif issubclass(_type, enum.Enum): return {"__type": _type.__name__, "name": obj.name} elif _type is np.ndarray or issubclass(_type, np.ndarray): return {"__type": _type.__name__, "value": obj.tolist()} elif _type is torch.Tensor: return { "__type": _type.__name__, # TODO: check size and add warning for large tensors: T69137799 "value": obj.tolist(), "dtype": object_to_json(obj.dtype), "device": object_to_json(obj.device), } elif _type.__module__ == "torch": # Torch does not support saving to string, so save to buffer first return { "__type": f"torch_{_type.__name__}", "value": torch_type_to_str(obj) } err = (f"Object {obj} passed to `object_to_json` (of type {_type}) is " f"not registered with a corresponding encoder in ENCODER_REGISTRY.") raise JSONEncodeError(err)
def object_attribute_dicts_find_unequal_fields( one_dict: Dict[str, Any], other_dict: Dict[str, Any], fast_return: bool = True ) -> Tuple[Dict[str, Tuple[Any, Any]], Dict[str, Tuple[Any, Any]]]: """Utility for finding out what attributes of two objects' attribute dicts are unequal. Args: one_dict: First object's attribute dict (`obj.__dict__`). other_dict: Second object's attribute dict (`obj.__dict__`). fast_return: Boolean representing whether to return as soon as a single unequal attribute was found or to iterate over all attributes and collect all unequal ones. Returns: Two dictionaries: - attribute name to attribute values of unequal type (as a tuple), - attribute name to attribute values of unequal value (as a tuple). """ unequal_type, unequal_value = {}, {} for field in one_dict: one_val = one_dict.get(field) other_val = other_dict.get(field) one_val = numpy_type_to_python_type(one_val) other_val = numpy_type_to_python_type(other_val) if type(one_val) != type(other_val): unequal_type[field] = (one_val, other_val) if fast_return: return unequal_type, unequal_value if field == "_experiment": # prevent infinite loop when checking equality of Trials equal = one_val is other_val is None or (one_val._name == other_val._name) elif field == "_model": # pragma: no cover (tested in modelbridge) # TODO[T52643706]: replace with per-`ModelBridge` method like # `equivalent_models`, to compare models more meaningfully. if not hasattr(one_val, "model"): equal = not hasattr(other_val, "model") else: # If model bridges have a `model` attribute, the types of the # values of those attributes should be equal if the model # bridge is the same. equal = isinstance(one_val.model, type(other_val.model)) elif isinstance(one_val, list): equal = same_elements(one_val, other_val) elif isinstance(one_val, dict): equal = sorted(one_val.keys()) == sorted(other_val.keys()) equal = equal and same_elements( list(one_val.values()), list(other_val.values()) ) elif isinstance(one_val, np.ndarray): equal = np.array_equal(one_val, other_val) elif isinstance(one_val, datetime): equal = datetime_equals(one_val, other_val) elif isinstance(one_val, float): equal = np.isclose(one_val, other_val) elif isinstance(one_val, pd.DataFrame): equal = dataframe_equals(one_val, other_val) else: equal = one_val == other_val if not equal: unequal_value[field] = (one_val, other_val) if fast_return: return unequal_type, unequal_value return unequal_type, unequal_value
def complete_trial( self, trial_index: int, # acceptable `raw_data` argument formats: # 1) {metric_name -> (mean, standard error)} # 2) (mean, standard error) and we assume metric name == objective name # 3) only the mean, and we assume metric name == objective name and # standard error == 0 raw_data: TEvaluationOutcome, metadata: Optional[Dict[str, str]] = None, ) -> None: """ Completes the trial with given metric values and adds optional metadata to it. Args: trial_index: Index of trial within the experiment. raw_data: Evaluation data for the trial. Can be a mapping from metric name to a tuple of mean and SEM, just a tuple of mean and SEM if only one metric in optimization, or just the mean if there is no SEM. metadata: Additional metadata to track about this run. """ assert isinstance( trial_index, int ), f"Trial index must be an int, got: {trial_index}." # pragma: no cover trial = self.experiment.trials[trial_index] if not isinstance(trial, Trial): raise NotImplementedError( "Batch trial functionality is not yet available through Service API." ) if metadata is not None: trial._run_metadata = metadata if isinstance(raw_data, dict): evaluations = {not_none(trial.arm).name: raw_data} elif isinstance(raw_data, tuple): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: raw_data } } elif isinstance(raw_data, (float, int)): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: ( raw_data, 0.0, ) } } elif isinstance(raw_data, (np.float32, np.float64, np.int32, np.int64)): evaluations = { not_none(trial.arm).name: { self.experiment.optimization_config.objective.metric.name: ( numpy_type_to_python_type(raw_data), 0.0, ) } } else: raise ValueError( "Raw data has an invalid type. The data must either be in the form " "of a dictionary of metric names to mean, sem tuples, " "or a single mean, sem tuple, or a single mean.") data = Data.from_evaluations(evaluations, trial.index) trial.mark_completed() self.experiment.attach_data(data) self._updated_trials.append(trial_index) self._save_experiment_if_possible()