示例#1
0
    def complete_trial(self, trial_index, raw_data, metadata=None):
        """
        This has more strict requirements of the raw_data than the AxClient, which
        simplifies this code.

        @param trial_index (int)
        The index returned by get_next_trial.

        @param raw_data (dict)
        Format: {"metric1": (mean1, sem1),
                 "metric2": (mean2, sem2)}
        If the sem is None, Ax will try to infer it.
        """
        if not isinstance(raw_data, dict) or any(
                isinstance(v, numbers.Number) for v in raw_data.values()):
            # A more strict requirement than the AxClient (intentionally)
            raise ValueError(
                "CoreAxClient requires explicit metric names, means, and SEMs."
                f" You provided: {raw_data}")

        trial = self.experiment.trials.get(trial_index)
        trial._run_metadata = metadata if metadata is not None else {}
        self.experiment.attach_data(data=Data.from_evaluations(
            evaluations={trial.arm.name: raw_data},
            trial_index=trial.index,
        ))
        if self.verbose:
            print(f"Marking Ax trial {trial.index} as completed")
        trial.mark_completed()
示例#2
0
 def create_load_experiment(self):
     """ Creates the experiment or loads it from the json file"""
     if path.exists(path.join(self.root, self.name + ".json")):
         exp = load_data(path.join(self.root, self.name), self.objectives)
         data = pass_data_to_exp(path.join(self.root, self.name + ".csv"))
         exp.attach_data(data)
     else:
         exp = self.get_experiment()
         data = Data()
     return exp, data
示例#3
0
 def fetch_trial_data(self, trial):
     records = []
     for arm_name, arm in trial.arms_by_name.items():
         params = arm.parameters
         mean, sem = jumper(params)
         records.append({
             "arm_name": arm_name,
             "metric_name": self.name,
             "mean": mean,
             "sem": sem,
             "trial_index": trial.index,
         })
     return Data(df=pd.DataFrame.from_records(records))
示例#4
0
 def fetch_trial_data(self, trial):
     """
     Function to retrieve the trials data for this metric
     """
     records = []
     for arm_name, arm in trial.arms_by_name.items():
         self.parametrization = arm.parameters
         records.append({
             "arm_name": arm_name,
             "metric_name": self.name,
             "mean": self.net_weighting(),
             "sem": 0.0,
             "trial_index": trial.index,
         })
     return Data(df=DataFrame.from_records(records))
示例#5
0
 def fetch_trial_data(self, trial):
     """
     Function to retrieve the trials data for this metric
     """
     records = []
     for arm_name, arm in trial.arms_by_name.items():
         self.parametrization = arm.parameters
         records.append({
             "arm_name": arm_name,
             "metric_name": self.name,
             "mean": self.latency_measure(),
             "sem": 0.0,
             "trial_index": trial.index,
             # TODO: add time spent in each trial
         })
     return Data(df=DataFrame.from_records(records))
 def fetch_trial_data(self, trial):
     records = []
     if str(trial.index) not in self.trial_cache.keys():
         self.trial_cache[str(trial.index)] = {}
     for arm_name, arm in trial.arms_by_name.items():
         if arm_name not in self.trial_cache[str(trial.index)].keys():
             params = arm.parameters
             record = {
                 "arm_name": arm_name,
                 "metric_name": self.name,
                 "mean": evaluation_func(params),
                 "sem": 0.0,
                 "trial_index": trial.index,
             }
             self.trial_cache[str(trial.index)][str(arm_name)] = record
         else:
             record = self.trial_cache[str(trial.index)][str(arm_name)]
         records.append(record)
     return Data(df=DataFrame.from_records(records))
示例#7
0
    def fetch_trial_data(self, trial):
        records = []
        for arm_name, arm in trial.arms_by_name.items():
            params = arm.parameters

            # TODO: add timing info as optional parameter and as outcome metric
            # TODO: maybe add interval score calculation as outcome metric
            mean = crabnet_mae(params,
                               self.train_val_df,
                               n_splits=self.n_splits)

            records.append({
                "arm_name": arm_name,
                "metric_name": self.name,
                "trial_index": trial.index,
                "mean": mean,
                "sem": None,
            })
        return Data(df=pd.DataFrame.from_records(records))
示例#8
0
def matbench_fold(fold):
    t0 = time()
    train_inputs, train_outputs = task.get_train_and_val_data(fold)
    train_val_df = pd.DataFrame({
        "formula": train_inputs.values,
        "target": train_outputs.values
    })
    if dummy:
        train_val_df = train_val_df[:25]

    optimization_config = OptimizationConfig(objective=Objective(
        metric=CrabNetMetric(name=metric,
                             train_val_df=train_val_df,
                             n_splits=n_splits),
        minimize=True,
    ), )
    # TODO: use status_quo (Arm) as default CrabNet parameters
    exp = Experiment(
        name="nested_crabnet_mae_saas",
        search_space=search_space,
        optimization_config=optimization_config,
        runner=SyntheticRunner(),
    )

    sobol = Models.SOBOL(exp.search_space)
    print("evaluating SOBOL points")
    for _ in range(n_sobol):
        print(_)
        trial = exp.new_trial(generator_run=sobol.gen(1))
        trial.run()
        trial.mark_completed()

    data = exp.fetch_data()
    j = -1
    new_value = np.nan
    best_so_far = np.nan
    for j in range(n_saas):
        saas = Models.FULLYBAYESIAN(
            experiment=exp,
            data=exp.fetch_data(),
            num_samples=
            num_samples,  # Increasing this may result in better model fits
            warmup_steps=
            warmup_steps,  # Increasing this may result in better model fits
            gp_kernel=
            "rbf",  # "rbf" is the default in the paper, but we also support "matern"
            torch_device=tkwargs["device"],
            torch_dtype=tkwargs["dtype"],
            verbose=False,  # Set to True to print stats from MCMC
            disable_progbar=
            True,  # Set to False to print a progress bar from MCMC
        )
        generator_run = saas.gen(1)
        best_arm, _ = generator_run.best_arm_predictions
        trial = exp.new_trial(generator_run=generator_run)
        trial.run()
        trial.mark_completed()
        data = Data.from_multiple_data([data, trial.fetch_data()])
        new_value = trial.fetch_data().df["mean"].min()
        best_so_far = data.df["mean"].min()
        tf = time()
        print(
            f"iter{j}, BestInIter:{new_value:.3f}, BestSoFar:{best_so_far:.3f} elapsed time: {tf - t0}",
        )

    exp.fetch_data()
    best_parameters = best_arm.parameters

    experiment_fpath = join(experiment_dir, "experiment" + str(fold) + ".json")
    save_experiment(exp, experiment_fpath)

    test_pred, default_mae, test_mae, best_parameterization = get_test_results(
        task, fold, best_parameters, train_val_df)
    print(f"default_mae: {default_mae}")
    print(f"test_mae: {test_mae}")
    # maes.append(test_mae)  # [0.32241879861870626, ...]

    # task.record(fold, test_pred, params=best_parameterization)

    return test_pred, best_parameterization
示例#9
0
def pass_data_to_exp(csv):
    """Loads the values from each of the evaluations to be further
    passed to a experiment"""
    dataframe = read_csv(csv, index_col=0)
    return Data(df=dataframe)