Пример #1
0
 def make_dataset(
     self,
     dag: FugueWorkflow,
     dataset: Any,
     df: Any = None,
     df_name: str = TUNE_DATASET_DF_DEFAULT_NAME,
     test_df: Any = None,
     test_df_name: str = TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME,
     partition_keys: Optional[List[str]] = None,
     temp_path: str = "",
 ) -> TuneDataset:
     assert_or_throw(dataset is not None,
                     TuneCompileError("dataset can't be None"))
     if isinstance(dataset, TuneDataset):
         assert_or_throw(
             df is None,
             TuneCompileError("can't set df when dataset is TuneDataset"))
         return dataset
     if isinstance(dataset, Space):
         path = self.get_path_or_temp(temp_path)
         builder = TuneDatasetBuilder(dataset, path)
         if df is not None:
             wdf = dag.df(df)
             if partition_keys is not None and len(partition_keys) > 0:
                 wdf = wdf.partition_by(*partition_keys)
             builder.add_df(df_name, wdf)
         if test_df is not None:
             wdf = dag.df(test_df)
             how = "cross"
             if partition_keys is not None and len(partition_keys) > 0:
                 wdf = wdf.partition_by(*partition_keys)
                 how = "inner"
             builder.add_df(test_df_name, wdf, how=how)
         return builder.build(dag, batch_size=1, shuffle=True)
     raise TuneCompileError(f"{dataset} can't be converted to TuneDataset")
Пример #2
0
 def _object_to_iterative_objective(self,
                                    obj: Any) -> IterativeObjectiveFunc:
     assert_or_throw(obj is not None,
                     TuneCompileError("objective can't be None"))
     if isinstance(obj, IterativeObjectiveFunc):
         return obj
     raise TuneCompileError(
         f"{obj} can't be converted to iterative objective function")
Пример #3
0
 def make_noniterative_objective(self,
                                 obj: Any) -> NonIterativeObjectiveFunc:
     assert_or_throw(obj is not None,
                     TuneCompileError("objective can't be None"))
     if isinstance(obj, NonIterativeObjectiveFunc):
         return obj
     return self._noniterative_objective_converter(obj)
Пример #4
0
 def _object_to_stopper(self, obj: Any) -> Optional[NonIterativeStopper]:
     if isinstance(obj, NonIterativeStopper):
         return obj
     if obj is None:
         return None
     raise TuneCompileError(
         f"{obj} can't be converted to NonIterativeStopper")
Пример #5
0
 def _object_to_noniterative_local_optimizer(
         self, obj: Any) -> NonIterativeObjectiveLocalOptimizer:
     if isinstance(obj, NonIterativeObjectiveLocalOptimizer):
         return obj
     if obj is None:
         return NonIterativeObjectiveLocalOptimizer()
     raise TuneCompileError(
         f"{obj} can't be converted to non iterative objective optimizer")
Пример #6
0
    def add_df(self,
               name: str,
               df: WorkflowDataFrame,
               how: str = "") -> "TuneDatasetBuilder":
        """Add a dataframe to the dataset

        :param name: name of the dataframe, it will also create a
          ``__tune_df__<name>`` column in the dataset dataframe
        :param df: the dataframe to add.
        :param how: join type, can accept ``semi``, ``left_semi``,
          ``anti``, ``left_anti``, ``inner``, ``left_outer``,
          ``right_outer``, ``full_outer``, ``cross``
        :returns: the builder itself

        .. note::

            For the first dataframe you add, ``how`` should be empty.
            From the second dataframe you add, ``how`` must be set.

        .. note::

            If ``df`` is prepartitioned, the partition key will be used to
            join with the added dataframes. Read
            :ref:`TuneDataset Tutorial </notebooks/tune_dataset.ipynb>`
            for more details
        """
        assert_or_throw(
            not any(r[0] == name for r in self._dfs_spec),
            TuneCompileError(name + " already exists"),
        )
        if len(self._dfs_spec) == 0:
            assert_or_throw(
                how == "",
                TuneCompileError("first dataframe can't specify how to join"))
        else:
            assert_or_throw(
                how != "",
                TuneCompileError(
                    "must specify how to join after first dataframe"),
            )
        self._dfs_spec.append((name, df, how))
        return self
Пример #7
0
 def add_df(self,
            name: str,
            df: WorkflowDataFrame,
            how: str = "") -> "TuneDatasetBuilder":
     assert_or_throw(
         not any(r[0] == name for r in self._dfs_spec),
         TuneCompileError(name + " already exists"),
     )
     if len(self._dfs_spec) == 0:
         assert_or_throw(
             how == "",
             TuneCompileError("first dataframe can't specify how to join"))
     else:
         assert_or_throw(
             how != "",
             TuneCompileError(
                 "must specify how to join after first dataframe"),
         )
     self._dfs_spec.append((name, df, how))
     return self
Пример #8
0
 def _get_distributed(self, distributed: Optional[bool]) -> bool:
     if distributed is None:
         return self._optimizer.distributable
     if distributed:
         assert_or_throw(
             self._optimizer.distributable,
             TuneCompileError(
                 f"can't distribute non-distributable optimizer {self._optimizer}"
             ),
         )
         return True
     return False
Пример #9
0
def suggest_by_hyperband(
    objective: Any,
    space: Space,
    plans: List[List[Tuple[float, int]]],
    train_df: Any = None,
    temp_path: str = "",
    partition_keys: Optional[List[str]] = None,
    top_n: int = 1,
    monitor: Any = None,
    distributed: Optional[bool] = None,
    execution_engine: Any = None,
    execution_engine_conf: Any = None,
) -> List[TrialReport]:
    assert_or_throw(
        not space.has_random_parameter,
        TuneCompileError("space can't contain random parameters, "
                         "use sample method before calling this function"),
    )
    dag = FugueWorkflow()
    dataset = TUNE_OBJECT_FACTORY.make_dataset(
        dag,
        space,
        df=train_df,
        partition_keys=partition_keys,
        temp_path=temp_path,
    )
    study = optimize_by_hyperband(
        objective=objective,
        dataset=dataset,
        plans=plans,
        checkpoint_path=temp_path,
        distributed=distributed,
        monitor=monitor,
    )
    study.result(top_n).yield_dataframe_as("result")

    rows = list(
        dag.run(
            execution_engine,
            conf=execution_engine_conf,
        )["result"].as_dict_iterable())
    return [
        TrialReport.from_jsondict(json.loads(r[TUNE_REPORT]))
        for r in sorted(rows, key=lambda r: r[TUNE_REPORT_METRIC])
    ]
Пример #10
0
def to_noniterative_objective(
    obj: Any,
    min_better: bool = True,
    global_vars: Optional[Dict[str, Any]] = None,
    local_vars: Optional[Dict[str, Any]] = None,
) -> NonIterativeObjectiveFunc:
    if isinstance(obj, NonIterativeObjectiveFunc):
        return copy.copy(obj)
    global_vars, local_vars = get_caller_global_local_vars(global_vars, local_vars)
    try:
        f = to_function(obj, global_vars=global_vars, local_vars=local_vars)
        # this is for string expression of function with decorator
        if isinstance(f, NonIterativeObjectiveFunc):
            return copy.copy(f)
        # this is for functions without decorator
        return _NonIterativeObjectiveFuncWrapper.from_func(f, min_better)
    except Exception as e:
        exp = e
    raise TuneCompileError(f"{obj} is not a valid tunable function", exp)
Пример #11
0
def suggest_by_sha(
    objective: Any,
    space: Space,
    plan: List[Tuple[float, int]],
    train_df: Any = None,
    temp_path: str = "",
    partition_keys: Optional[List[str]] = None,
    top_n: int = 1,
    monitor: Any = None,
    distributed: Optional[bool] = None,
    execution_engine: Any = None,
    execution_engine_conf: Any = None,
) -> List[TrialReport]:
    assert_or_throw(
        not space.has_stochastic,
        TuneCompileError("space can't contain random parameters, "
                         "use sample method before calling this function"),
    )
    dag = FugueWorkflow()
    dataset = TUNE_OBJECT_FACTORY.make_dataset(
        dag,
        space,
        df=train_df,
        partition_keys=partition_keys,
        temp_path=temp_path,
    )
    study = optimize_by_sha(
        objective=objective,
        dataset=dataset,
        plan=plan,
        checkpoint_path=temp_path,
        distributed=distributed,
        monitor=monitor,
    )
    study.result(top_n).yield_dataframe_as("result")

    return _run(
        dag=dag,
        execution_engine=execution_engine,
        execution_engine_conf=execution_engine_conf,
    )
Пример #12
0
 def _object_to_monitor(self, obj: Any) -> Optional[Monitor]:
     if obj is None:
         return None
     raise TuneCompileError(f"{obj} can't be converted to Monitor")
Пример #13
0
 def _object_to_noniterative_objective_runner(
         self, obj: Any) -> NonIterativeObjectiveRunner:
     if obj is None:
         return NonIterativeObjectiveRunner()
     raise TuneCompileError(
         f"{obj} can't be converted to non iterative objective runner")
Пример #14
0
 def _object_to_noniterative_objective(
         self, obj: Any) -> NonIterativeObjectiveFunc:
     raise TuneCompileError(
         f"{obj} can't be converted to non iterative objective function")
Пример #15
0
 def get_path_or_temp(self, path: str) -> str:
     if path is None or path == "":
         path = self._tmp
     assert_or_throw(path != "",
                     TuneCompileError("path or temp path must be set"))
     return path