def complete_trial(self, trial_index, raw_data, metadata=None): """ This has more strict requirements of the raw_data than the AxClient, which simplifies this code. @param trial_index (int) The index returned by get_next_trial. @param raw_data (dict) Format: {"metric1": (mean1, sem1), "metric2": (mean2, sem2)} If the sem is None, Ax will try to infer it. """ if not isinstance(raw_data, dict) or any( isinstance(v, numbers.Number) for v in raw_data.values()): # A more strict requirement than the AxClient (intentionally) raise ValueError( "CoreAxClient requires explicit metric names, means, and SEMs." f" You provided: {raw_data}") trial = self.experiment.trials.get(trial_index) trial._run_metadata = metadata if metadata is not None else {} self.experiment.attach_data(data=Data.from_evaluations( evaluations={trial.arm.name: raw_data}, trial_index=trial.index, )) if self.verbose: print(f"Marking Ax trial {trial.index} as completed") trial.mark_completed()
def create_load_experiment(self): """ Creates the experiment or loads it from the json file""" if path.exists(path.join(self.root, self.name + ".json")): exp = load_data(path.join(self.root, self.name), self.objectives) data = pass_data_to_exp(path.join(self.root, self.name + ".csv")) exp.attach_data(data) else: exp = self.get_experiment() data = Data() return exp, data
def fetch_trial_data(self, trial): records = [] for arm_name, arm in trial.arms_by_name.items(): params = arm.parameters mean, sem = jumper(params) records.append({ "arm_name": arm_name, "metric_name": self.name, "mean": mean, "sem": sem, "trial_index": trial.index, }) return Data(df=pd.DataFrame.from_records(records))
def fetch_trial_data(self, trial): """ Function to retrieve the trials data for this metric """ records = [] for arm_name, arm in trial.arms_by_name.items(): self.parametrization = arm.parameters records.append({ "arm_name": arm_name, "metric_name": self.name, "mean": self.net_weighting(), "sem": 0.0, "trial_index": trial.index, }) return Data(df=DataFrame.from_records(records))
def fetch_trial_data(self, trial): """ Function to retrieve the trials data for this metric """ records = [] for arm_name, arm in trial.arms_by_name.items(): self.parametrization = arm.parameters records.append({ "arm_name": arm_name, "metric_name": self.name, "mean": self.latency_measure(), "sem": 0.0, "trial_index": trial.index, # TODO: add time spent in each trial }) return Data(df=DataFrame.from_records(records))
def fetch_trial_data(self, trial): records = [] if str(trial.index) not in self.trial_cache.keys(): self.trial_cache[str(trial.index)] = {} for arm_name, arm in trial.arms_by_name.items(): if arm_name not in self.trial_cache[str(trial.index)].keys(): params = arm.parameters record = { "arm_name": arm_name, "metric_name": self.name, "mean": evaluation_func(params), "sem": 0.0, "trial_index": trial.index, } self.trial_cache[str(trial.index)][str(arm_name)] = record else: record = self.trial_cache[str(trial.index)][str(arm_name)] records.append(record) return Data(df=DataFrame.from_records(records))
def fetch_trial_data(self, trial): records = [] for arm_name, arm in trial.arms_by_name.items(): params = arm.parameters # TODO: add timing info as optional parameter and as outcome metric # TODO: maybe add interval score calculation as outcome metric mean = crabnet_mae(params, self.train_val_df, n_splits=self.n_splits) records.append({ "arm_name": arm_name, "metric_name": self.name, "trial_index": trial.index, "mean": mean, "sem": None, }) return Data(df=pd.DataFrame.from_records(records))
def matbench_fold(fold): t0 = time() train_inputs, train_outputs = task.get_train_and_val_data(fold) train_val_df = pd.DataFrame({ "formula": train_inputs.values, "target": train_outputs.values }) if dummy: train_val_df = train_val_df[:25] optimization_config = OptimizationConfig(objective=Objective( metric=CrabNetMetric(name=metric, train_val_df=train_val_df, n_splits=n_splits), minimize=True, ), ) # TODO: use status_quo (Arm) as default CrabNet parameters exp = Experiment( name="nested_crabnet_mae_saas", search_space=search_space, optimization_config=optimization_config, runner=SyntheticRunner(), ) sobol = Models.SOBOL(exp.search_space) print("evaluating SOBOL points") for _ in range(n_sobol): print(_) trial = exp.new_trial(generator_run=sobol.gen(1)) trial.run() trial.mark_completed() data = exp.fetch_data() j = -1 new_value = np.nan best_so_far = np.nan for j in range(n_saas): saas = Models.FULLYBAYESIAN( experiment=exp, data=exp.fetch_data(), num_samples= num_samples, # Increasing this may result in better model fits warmup_steps= warmup_steps, # Increasing this may result in better model fits gp_kernel= "rbf", # "rbf" is the default in the paper, but we also support "matern" torch_device=tkwargs["device"], torch_dtype=tkwargs["dtype"], verbose=False, # Set to True to print stats from MCMC disable_progbar= True, # Set to False to print a progress bar from MCMC ) generator_run = saas.gen(1) best_arm, _ = generator_run.best_arm_predictions trial = exp.new_trial(generator_run=generator_run) trial.run() trial.mark_completed() data = Data.from_multiple_data([data, trial.fetch_data()]) new_value = trial.fetch_data().df["mean"].min() best_so_far = data.df["mean"].min() tf = time() print( f"iter{j}, BestInIter:{new_value:.3f}, BestSoFar:{best_so_far:.3f} elapsed time: {tf - t0}", ) exp.fetch_data() best_parameters = best_arm.parameters experiment_fpath = join(experiment_dir, "experiment" + str(fold) + ".json") save_experiment(exp, experiment_fpath) test_pred, default_mae, test_mae, best_parameterization = get_test_results( task, fold, best_parameters, train_val_df) print(f"default_mae: {default_mae}") print(f"test_mae: {test_mae}") # maes.append(test_mae) # [0.32241879861870626, ...] # task.record(fold, test_pred, params=best_parameterization) return test_pred, best_parameterization
def pass_data_to_exp(csv): """Loads the values from each of the evaluations to be further passed to a experiment""" dataframe = read_csv(csv, index_col=0) return Data(df=dataframe)