示例#1
0
    def concat(
            self,
            other: "TuningParametersTemplate") -> "TuningParametersTemplate":
        """Concatenate with another template and generate a new template.

        .. note::

            The other template must not have any key existed in this template, otherwise
            ``ValueError`` will be raised

        :return: the merged template
        """
        res = TuningParametersTemplate({})
        res._units = [x.copy() for x in self._units]
        res._has_grid = self._has_grid | other._has_grid
        res._has_stochastic = self._has_stochastic | other._has_stochastic
        res._template = dict(self._template)
        res._func_positions = self._func_positions + other._func_positions
        for k, v in other._template.items():
            assert_or_throw(
                k not in res._template,
                ValueError(f"{k} already exists in the original template"),
            )
            res._template[k] = v
        if not other.empty:
            temp_map = {id(x.expr): x for x in res._units}
            for u in other._units:
                if id(u.expr) in temp_map:
                    temp_map[id(u.expr)].positions += u.positions
                else:
                    res._units.append(u.copy())
        return res
示例#2
0
 def make_noniterative_objective(self,
                                 obj: Any) -> NonIterativeObjectiveFunc:
     assert_or_throw(obj is not None,
                     TuneCompileError("objective can't be None"))
     if isinstance(obj, NonIterativeObjectiveFunc):
         return obj
     return self._noniterative_objective_converter(obj)
示例#3
0
    def latest(self) -> FSBase:
        """latest checkpoint folder

        :raises AssertionError: if there was no checkpoint
        """
        assert_or_throw(len(self) > 0, "checkpoint history is empty")
        return self._fs.opendir(self._iterations[-1])
示例#4
0
 def __init__(  # noqa: C901
         self,
         df: Any = None,
         schema: Any = None,
         metadata: Any = None):
     try:
         if isinstance(df, Iterable):
             self._native = make_empty_aware(self._dfs_wrapper(df))
             orig_schema: Optional[Schema] = None
             if not self._native.empty:
                 orig_schema = self._native.peek().schema
         else:
             raise ValueError(
                 f"{df} is incompatible with LocalDataFrameIterableDataFrame"
             )
         if orig_schema is None and schema is None:
             raise FugueDataFrameInitError(
                 "schema is not provided and the input is empty")
         elif orig_schema is None and schema is not None:
             pass
         elif orig_schema is not None and schema is None:
             schema = orig_schema
         else:
             schema = Schema(schema) if not isinstance(schema,
                                                       Schema) else schema
             assert_or_throw(
                 orig_schema == schema,
                 lambda:
                 f"iterable schema {orig_schema} is different from {schema}",
             )
         super().__init__(schema, metadata)
     except FugueDataFrameError:
         raise
     except Exception as e:
         raise FugueDataFrameInitError from e
示例#5
0
 def make_dataset(
     self,
     dag: FugueWorkflow,
     dataset: Any,
     df: Any = None,
     df_name: str = TUNE_DATASET_DF_DEFAULT_NAME,
     test_df: Any = None,
     test_df_name: str = TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME,
     partition_keys: Optional[List[str]] = None,
     temp_path: str = "",
 ) -> TuneDataset:
     assert_or_throw(dataset is not None,
                     TuneCompileError("dataset can't be None"))
     if isinstance(dataset, TuneDataset):
         assert_or_throw(
             df is None,
             TuneCompileError("can't set df when dataset is TuneDataset"))
         return dataset
     if isinstance(dataset, Space):
         path = self.get_path_or_temp(temp_path)
         builder = TuneDatasetBuilder(dataset, path)
         if df is not None:
             wdf = dag.df(df)
             if partition_keys is not None and len(partition_keys) > 0:
                 wdf = wdf.partition_by(*partition_keys)
             builder.add_df(df_name, wdf)
         if test_df is not None:
             wdf = dag.df(test_df)
             how = "cross"
             if partition_keys is not None and len(partition_keys) > 0:
                 wdf = wdf.partition_by(*partition_keys)
                 how = "inner"
             builder.add_df(test_df_name, wdf, how=how)
         return builder.build(dag, batch_size=1, shuffle=True)
     raise TuneCompileError(f"{dataset} can't be converted to TuneDataset")
示例#6
0
 def deco(func: Callable) -> "_FuncAsTunable":
     assert_or_throw(
         not is_class_method(func),
         NotImplementedError(
             "tunable decorator can't be used on class methods"),
     )
     return _FuncAsTunable.from_func(func, distributable=distributable)
示例#7
0
def _to_tunable(
    obj: Any,
    global_vars: Optional[Dict[str, Any]] = None,
    local_vars: Optional[Dict[str, Any]] = None,
    distributable: Optional[bool] = None,
) -> Tunable:
    global_vars, local_vars = get_caller_global_local_vars(
        global_vars, local_vars)

    def get_tunable() -> Tunable:
        if isinstance(obj, Tunable):
            return copy.copy(obj)
        try:
            f = to_function(obj,
                            global_vars=global_vars,
                            local_vars=local_vars)
            # this is for string expression of function with decorator
            if isinstance(f, Tunable):
                return copy.copy(f)
            # this is for functions without decorator
            return _FuncAsTunable.from_func(f, distributable)
        except Exception as e:
            exp = e
        raise FugueTuneCompileError(f"{obj} is not a valid tunable function",
                                    exp)

    t = get_tunable()
    if distributable is None:
        distributable = t.distributable
    elif distributable:
        assert_or_throw(t.distributable,
                        FugueTuneCompileError(f"{t} is not distributable"))
    return t
示例#8
0
    def sample(self,
               n: int,
               seed: Any = None) -> Iterable["TuningParametersTemplate"]:
        """sample all stochastic parameters

        :param n: number of samples, must be a positive integer
        :param seed: random seed defaulting to None.
          It will take effect if it is not None.

        :yield: new templates with the grid paramters filled

        .. code-block:: python

            assert [dict(a=1.1,b=Grid(0,1)), dict(a=1.5,b=Grid(0,1))] == \
                list(to_template(dict(a=Rand(1,2),b=Grid(0,1))).sample(2,0))
        """
        assert_or_throw(n > 0, ValueError("sample count must be positive"))
        if not self.has_stochastic:
            yield self
        else:
            if seed is not None:
                np.random.seed(seed)
            gu: List[Tuple[int, List[Any]]] = [
                (i, u.expr.generate_many(n)) for i, u in enumerate(self._units)
                if isinstance(u.expr, StochasticExpression)
            ]
            yield from self._partial_fill([x[0] for x in gu],
                                          zip(*[data for _, data in gu]))
示例#9
0
 def __init__(self, *args, **kwargs: Any):
     if len(args) > 0:
         assert_or_throw(
             len(args) == 1 and len(kwargs) == 0,
             ValueError(
                 "when the first argument is a template or dict, "
                 "it must be the only argument of the constructor"
             ),
         )
         if isinstance(args[0], dict):
             self._templates = [TuningParametersTemplate(args[0])]
         elif isinstance(args[0], TuningParametersTemplate):
             self._templates = [args[0]]
         elif isinstance(args[0], Iterable):
             self._templates = list(args[0])
             assert_or_throw(
                 all(
                     isinstance(x, TuningParametersTemplate) for x in self._templates
                 ),
                 ValueError("not a list of templates"),
             )
         else:
             raise ValueError("invalid argument type " + str(type(args[0])))
     else:
         self._templates = [TuningParametersTemplate(kwargs)]
示例#10
0
def to_sk_model(obj: Any) -> Type:
    if isinstance(obj, str):
        obj = to_type(obj)
    assert_or_throw(
        is_classifier(obj) or is_regressor(obj),
        TypeError(f"{obj} is neither a sklearn classifier or regressor"),
    )
    return obj
示例#11
0
 def __init__(self, func: Callable):
     super().__init__()
     assert_or_throw(callable(func), lambda: ValueError(func))
     self._func = func
     if isinstance(func, LambdaType):
         self._uuid = to_uuid("lambda")
     else:
         self._uuid = to_uuid(get_full_type_path(func))
示例#12
0
文件: sql.py 项目: gityow/fugue
 def _where() -> str:
     if where is None:
         return ""
     assert_or_throw(
         not is_agg(where),
         lambda: ValueError(f"{where} has aggregation functions"),
     )
     return " WHERE " + self.generate(where.alias(""))
示例#13
0
 def _object_to_iterative_objective(self,
                                    obj: Any) -> IterativeObjectiveFunc:
     assert_or_throw(obj is not None,
                     TuneCompileError("objective can't be None"))
     if isinstance(obj, IterativeObjectiveFunc):
         return obj
     raise TuneCompileError(
         f"{obj} can't be converted to iterative objective function")
示例#14
0
 def __init__(self, value: Any):
     assert_or_throw(
         value is None
         or isinstance(value, _LiteralColumnExpr._VALID_TYPES),
         lambda: NotImplementedError(f"{value}, type: {type(value)}"),
     )
     self._value = value
     super().__init__()
示例#15
0
 def run(self, cursor: PartitionCursor, df: LocalDataFrame) -> LocalDataFrame:
     data = df.as_array(type_safe=True)
     assert_or_throw(
         len(data) == 1,
         FugueBug("each comap partition can have one and only one row"),
     )
     dfs = DataFrames(list(self._get_dfs(data[0])))
     return self.func(cursor, dfs)
示例#16
0
 def deco(func: Callable) -> NonIterativeObjectiveFunc:
     assert_or_throw(
         not is_class_method(func),
         NotImplementedError(
             "non_iterative_objective decorator can't be used on class methods"
         ),
     )
     return _NonIterativeObjectiveFuncWrapper.from_func(func, min_better)
示例#17
0
 def __init__(
     self,
     q: Optional[float] = None,
     log: bool = False,
 ):
     if q is not None:
         assert_or_throw(q > 0, f"{q} <= 0")
     self.q = q
     self.log = log
示例#18
0
def serialize_dfs(dfs: WorkflowDataFrames,
                  how: str = "inner",
                  path="") -> WorkflowDataFrame:
    assert_or_throw(dfs.has_key, "all datarames must be named")
    serialized = WorkflowDataFrames(
        {k: serialize_df(v, k, path)
         for k, v in dfs.items()})
    wf: FugueWorkflow = dfs.get_value_by_index(0).workflow
    return wf.join(serialized, how=how)
示例#19
0
 def add_dfs(self,
             dfs: WorkflowDataFrames,
             how: str = "") -> "TuneDatasetBuilder":
     assert_or_throw(dfs.has_key, "all datarames must be named")
     for k, v in dfs.items():
         if len(self._dfs_spec) == 0:
             self.add_df(k, v)
         else:
             self.add_df(k, v, how=how)
     return self
示例#20
0
 def distribution_func(self, seed: Any) -> float:
     if self.low == self.high:
         assert_or_throw(
             self.include_high,
             f"high {self.high} equals low but include_high = False",
         )
         return self.low
     if seed is not None:
         np.random.seed(seed)
     return np.random.uniform(self.low, self.high)
示例#21
0
 def simple_value(self) -> Dict[str, Any]:
     """If the template contains no tuning expression, it's simple
     and it will return parameters dictionary, otherwise, ``ValueError``
     will be raised
     """
     assert_or_throw(self.empty,
                     ValueError("template contains tuning expressions"))
     if len(self._func_positions) == 0:
         return self._template
     return self._fill_funcs(deepcopy(self._template))
示例#22
0
 def _get_df(self) -> WorkflowDataFrame:
     if isinstance(self._df, Yielded):
         return self._workflow.df(self._df)
     if isinstance(self._df, WorkflowDataFrame):
         assert_or_throw(
             self._df.workflow is self._workflow,
             FugueSQLError(f"{self._key}, {self._df} is from another workflow"),
         )
         return self._df
     return self._workflow.df(self._df)
示例#23
0
def _to_model(obj: Any) -> Any:
    if isinstance(obj, str):
        parts = obj.split(".")
        if len(parts) > 1:
            import_module(".".join(parts[:-1]))
        obj = to_type(obj)
    assert_or_throw(
        is_classifier(obj) or is_regressor(obj),
        TypeError(f"{obj} is neither a sklearn classifier or regressor"),
    )
    return obj
示例#24
0
    def register(self, handler: Any) -> str:
        """Register the hander into the server

        :param handler: |RPCHandlerLikeObject|
        :return: the unique key of the handler
        """
        with self._rpchandler_lock:
            key = "_" + str(uuid4()).split("-")[-1]
            assert_or_throw(key not in self._handlers, f"{key} already exists")
            self._handlers[key] = to_rpc_handler(handler).start()
            return key
示例#25
0
 def __init__(
     self,
     loc: float,
     scale: float,
     q: Optional[float] = None,
     log: bool = False,
 ):
     assert_or_throw(scale > 0, f"{scale}<=0")
     self.loc = loc
     self.scale = scale
     super().__init__(q, log)
示例#26
0
文件: sql.py 项目: gityow/fugue
 def _on_common_binary(self, expr: _BinaryOpExpr, bracket: bool) -> Iterable[str]:
     assert_or_throw(expr.op in _SUPPORTED_OPERATORS, NotImplementedError(expr))
     if bracket:
         yield "("
     if expr.is_distinct:  # pragma: no cover
         raise FugueBug(f"impossible case {expr}")
     yield from self._generate(expr.left, bracket=True)
     yield _SUPPORTED_OPERATORS[expr.op]
     yield from self._generate(expr.right, bracket=True)
     if bracket:
         yield ")"
示例#27
0
    def aggregate(
        self,
        df: DataFrame,
        partition_spec: Optional[PartitionSpec],
        agg_cols: List[ColumnExpr],
        metadata: Any = None,
    ):
        """Aggregate on dataframe

        :param df: the dataframe to aggregate on
        :param partition_spec: PartitionSpec to specify partition keys
        :param agg_cols: aggregation expressions
        :param metadata: dict-like object to add to the result dataframe,
            defaults to None. It's currently not used
        :return: the aggregated result as a dataframe

        .. admonition:: New Since
            :class: hint

            **0.6.0**

        .. seealso::

            Please find more expression examples in :mod:`fugue.column.sql` and
            :mod:`fugue.column.functions`

        .. admonition:: Examples

            .. code-block:: python

                import fugue.column.functions as f

                # SELECT MAX(b) AS b FROM df
                engine.aggregate(
                    df,
                    partition_spec=None,
                    agg_cols=[f.max(col("b"))])

                # SELECT a, MAX(b) AS x FROM df GROUP BY a
                engine.aggregate(
                    df,
                    partition_spec=PartitionSpec(by=["a"]),
                    agg_cols=[f.max(col("b")).alias("x")])
        """
        assert_or_throw(len(agg_cols) > 0, ValueError("agg_cols can't be empty"))
        assert_or_throw(
            all(is_agg(x) for x in agg_cols),
            ValueError("all agg_cols must be aggregation functions"),
        )
        keys: List[ColumnExpr] = []
        if partition_spec is not None and len(partition_spec.partition_by) > 0:
            keys = [col(y) for y in partition_spec.partition_by]
        cols = SelectColumns(*keys, *agg_cols)
        return self.select(df, cols=cols, metadata=metadata)
示例#28
0
 def __init__(
     self,
     mu: float,
     sigma: float,
     q: Optional[float] = None,
 ):
     assert_or_throw(sigma > 0, ValueError(sigma))
     assert_or_throw(q is None or q > 0, ValueError(q))
     self.mu = mu
     self.sigma = sigma
     super().__init__(q)
示例#29
0
 def _get_distributed(self, distributed: Optional[bool]) -> bool:
     if distributed is None:
         return self._optimizer.distributable
     if distributed:
         assert_or_throw(
             self._optimizer.distributable,
             TuneCompileError(
                 f"can't distribute non-distributable optimizer {self._optimizer}"
             ),
         )
         return True
     return False
示例#30
0
 def __init__(
     self,
     mu: int,
     sigma: float,
     q: int = 1,
 ):
     assert_or_throw(sigma > 0, ValueError(sigma))
     assert_or_throw(q > 0, ValueError(q))
     self.mu = mu
     self.sigma = sigma
     self.q = q
     super().__init__(q)