def concat( self, other: "TuningParametersTemplate") -> "TuningParametersTemplate": """Concatenate with another template and generate a new template. .. note:: The other template must not have any key existed in this template, otherwise ``ValueError`` will be raised :return: the merged template """ res = TuningParametersTemplate({}) res._units = [x.copy() for x in self._units] res._has_grid = self._has_grid | other._has_grid res._has_stochastic = self._has_stochastic | other._has_stochastic res._template = dict(self._template) res._func_positions = self._func_positions + other._func_positions for k, v in other._template.items(): assert_or_throw( k not in res._template, ValueError(f"{k} already exists in the original template"), ) res._template[k] = v if not other.empty: temp_map = {id(x.expr): x for x in res._units} for u in other._units: if id(u.expr) in temp_map: temp_map[id(u.expr)].positions += u.positions else: res._units.append(u.copy()) return res
def make_noniterative_objective(self, obj: Any) -> NonIterativeObjectiveFunc: assert_or_throw(obj is not None, TuneCompileError("objective can't be None")) if isinstance(obj, NonIterativeObjectiveFunc): return obj return self._noniterative_objective_converter(obj)
def latest(self) -> FSBase: """latest checkpoint folder :raises AssertionError: if there was no checkpoint """ assert_or_throw(len(self) > 0, "checkpoint history is empty") return self._fs.opendir(self._iterations[-1])
def __init__( # noqa: C901 self, df: Any = None, schema: Any = None, metadata: Any = None): try: if isinstance(df, Iterable): self._native = make_empty_aware(self._dfs_wrapper(df)) orig_schema: Optional[Schema] = None if not self._native.empty: orig_schema = self._native.peek().schema else: raise ValueError( f"{df} is incompatible with LocalDataFrameIterableDataFrame" ) if orig_schema is None and schema is None: raise FugueDataFrameInitError( "schema is not provided and the input is empty") elif orig_schema is None and schema is not None: pass elif orig_schema is not None and schema is None: schema = orig_schema else: schema = Schema(schema) if not isinstance(schema, Schema) else schema assert_or_throw( orig_schema == schema, lambda: f"iterable schema {orig_schema} is different from {schema}", ) super().__init__(schema, metadata) except FugueDataFrameError: raise except Exception as e: raise FugueDataFrameInitError from e
def make_dataset( self, dag: FugueWorkflow, dataset: Any, df: Any = None, df_name: str = TUNE_DATASET_DF_DEFAULT_NAME, test_df: Any = None, test_df_name: str = TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME, partition_keys: Optional[List[str]] = None, temp_path: str = "", ) -> TuneDataset: assert_or_throw(dataset is not None, TuneCompileError("dataset can't be None")) if isinstance(dataset, TuneDataset): assert_or_throw( df is None, TuneCompileError("can't set df when dataset is TuneDataset")) return dataset if isinstance(dataset, Space): path = self.get_path_or_temp(temp_path) builder = TuneDatasetBuilder(dataset, path) if df is not None: wdf = dag.df(df) if partition_keys is not None and len(partition_keys) > 0: wdf = wdf.partition_by(*partition_keys) builder.add_df(df_name, wdf) if test_df is not None: wdf = dag.df(test_df) how = "cross" if partition_keys is not None and len(partition_keys) > 0: wdf = wdf.partition_by(*partition_keys) how = "inner" builder.add_df(test_df_name, wdf, how=how) return builder.build(dag, batch_size=1, shuffle=True) raise TuneCompileError(f"{dataset} can't be converted to TuneDataset")
def deco(func: Callable) -> "_FuncAsTunable": assert_or_throw( not is_class_method(func), NotImplementedError( "tunable decorator can't be used on class methods"), ) return _FuncAsTunable.from_func(func, distributable=distributable)
def _to_tunable( obj: Any, global_vars: Optional[Dict[str, Any]] = None, local_vars: Optional[Dict[str, Any]] = None, distributable: Optional[bool] = None, ) -> Tunable: global_vars, local_vars = get_caller_global_local_vars( global_vars, local_vars) def get_tunable() -> Tunable: if isinstance(obj, Tunable): return copy.copy(obj) try: f = to_function(obj, global_vars=global_vars, local_vars=local_vars) # this is for string expression of function with decorator if isinstance(f, Tunable): return copy.copy(f) # this is for functions without decorator return _FuncAsTunable.from_func(f, distributable) except Exception as e: exp = e raise FugueTuneCompileError(f"{obj} is not a valid tunable function", exp) t = get_tunable() if distributable is None: distributable = t.distributable elif distributable: assert_or_throw(t.distributable, FugueTuneCompileError(f"{t} is not distributable")) return t
def sample(self, n: int, seed: Any = None) -> Iterable["TuningParametersTemplate"]: """sample all stochastic parameters :param n: number of samples, must be a positive integer :param seed: random seed defaulting to None. It will take effect if it is not None. :yield: new templates with the grid paramters filled .. code-block:: python assert [dict(a=1.1,b=Grid(0,1)), dict(a=1.5,b=Grid(0,1))] == \ list(to_template(dict(a=Rand(1,2),b=Grid(0,1))).sample(2,0)) """ assert_or_throw(n > 0, ValueError("sample count must be positive")) if not self.has_stochastic: yield self else: if seed is not None: np.random.seed(seed) gu: List[Tuple[int, List[Any]]] = [ (i, u.expr.generate_many(n)) for i, u in enumerate(self._units) if isinstance(u.expr, StochasticExpression) ] yield from self._partial_fill([x[0] for x in gu], zip(*[data for _, data in gu]))
def __init__(self, *args, **kwargs: Any): if len(args) > 0: assert_or_throw( len(args) == 1 and len(kwargs) == 0, ValueError( "when the first argument is a template or dict, " "it must be the only argument of the constructor" ), ) if isinstance(args[0], dict): self._templates = [TuningParametersTemplate(args[0])] elif isinstance(args[0], TuningParametersTemplate): self._templates = [args[0]] elif isinstance(args[0], Iterable): self._templates = list(args[0]) assert_or_throw( all( isinstance(x, TuningParametersTemplate) for x in self._templates ), ValueError("not a list of templates"), ) else: raise ValueError("invalid argument type " + str(type(args[0]))) else: self._templates = [TuningParametersTemplate(kwargs)]
def to_sk_model(obj: Any) -> Type: if isinstance(obj, str): obj = to_type(obj) assert_or_throw( is_classifier(obj) or is_regressor(obj), TypeError(f"{obj} is neither a sklearn classifier or regressor"), ) return obj
def __init__(self, func: Callable): super().__init__() assert_or_throw(callable(func), lambda: ValueError(func)) self._func = func if isinstance(func, LambdaType): self._uuid = to_uuid("lambda") else: self._uuid = to_uuid(get_full_type_path(func))
def _where() -> str: if where is None: return "" assert_or_throw( not is_agg(where), lambda: ValueError(f"{where} has aggregation functions"), ) return " WHERE " + self.generate(where.alias(""))
def _object_to_iterative_objective(self, obj: Any) -> IterativeObjectiveFunc: assert_or_throw(obj is not None, TuneCompileError("objective can't be None")) if isinstance(obj, IterativeObjectiveFunc): return obj raise TuneCompileError( f"{obj} can't be converted to iterative objective function")
def __init__(self, value: Any): assert_or_throw( value is None or isinstance(value, _LiteralColumnExpr._VALID_TYPES), lambda: NotImplementedError(f"{value}, type: {type(value)}"), ) self._value = value super().__init__()
def run(self, cursor: PartitionCursor, df: LocalDataFrame) -> LocalDataFrame: data = df.as_array(type_safe=True) assert_or_throw( len(data) == 1, FugueBug("each comap partition can have one and only one row"), ) dfs = DataFrames(list(self._get_dfs(data[0]))) return self.func(cursor, dfs)
def deco(func: Callable) -> NonIterativeObjectiveFunc: assert_or_throw( not is_class_method(func), NotImplementedError( "non_iterative_objective decorator can't be used on class methods" ), ) return _NonIterativeObjectiveFuncWrapper.from_func(func, min_better)
def __init__( self, q: Optional[float] = None, log: bool = False, ): if q is not None: assert_or_throw(q > 0, f"{q} <= 0") self.q = q self.log = log
def serialize_dfs(dfs: WorkflowDataFrames, how: str = "inner", path="") -> WorkflowDataFrame: assert_or_throw(dfs.has_key, "all datarames must be named") serialized = WorkflowDataFrames( {k: serialize_df(v, k, path) for k, v in dfs.items()}) wf: FugueWorkflow = dfs.get_value_by_index(0).workflow return wf.join(serialized, how=how)
def add_dfs(self, dfs: WorkflowDataFrames, how: str = "") -> "TuneDatasetBuilder": assert_or_throw(dfs.has_key, "all datarames must be named") for k, v in dfs.items(): if len(self._dfs_spec) == 0: self.add_df(k, v) else: self.add_df(k, v, how=how) return self
def distribution_func(self, seed: Any) -> float: if self.low == self.high: assert_or_throw( self.include_high, f"high {self.high} equals low but include_high = False", ) return self.low if seed is not None: np.random.seed(seed) return np.random.uniform(self.low, self.high)
def simple_value(self) -> Dict[str, Any]: """If the template contains no tuning expression, it's simple and it will return parameters dictionary, otherwise, ``ValueError`` will be raised """ assert_or_throw(self.empty, ValueError("template contains tuning expressions")) if len(self._func_positions) == 0: return self._template return self._fill_funcs(deepcopy(self._template))
def _get_df(self) -> WorkflowDataFrame: if isinstance(self._df, Yielded): return self._workflow.df(self._df) if isinstance(self._df, WorkflowDataFrame): assert_or_throw( self._df.workflow is self._workflow, FugueSQLError(f"{self._key}, {self._df} is from another workflow"), ) return self._df return self._workflow.df(self._df)
def _to_model(obj: Any) -> Any: if isinstance(obj, str): parts = obj.split(".") if len(parts) > 1: import_module(".".join(parts[:-1])) obj = to_type(obj) assert_or_throw( is_classifier(obj) or is_regressor(obj), TypeError(f"{obj} is neither a sklearn classifier or regressor"), ) return obj
def register(self, handler: Any) -> str: """Register the hander into the server :param handler: |RPCHandlerLikeObject| :return: the unique key of the handler """ with self._rpchandler_lock: key = "_" + str(uuid4()).split("-")[-1] assert_or_throw(key not in self._handlers, f"{key} already exists") self._handlers[key] = to_rpc_handler(handler).start() return key
def __init__( self, loc: float, scale: float, q: Optional[float] = None, log: bool = False, ): assert_or_throw(scale > 0, f"{scale}<=0") self.loc = loc self.scale = scale super().__init__(q, log)
def _on_common_binary(self, expr: _BinaryOpExpr, bracket: bool) -> Iterable[str]: assert_or_throw(expr.op in _SUPPORTED_OPERATORS, NotImplementedError(expr)) if bracket: yield "(" if expr.is_distinct: # pragma: no cover raise FugueBug(f"impossible case {expr}") yield from self._generate(expr.left, bracket=True) yield _SUPPORTED_OPERATORS[expr.op] yield from self._generate(expr.right, bracket=True) if bracket: yield ")"
def aggregate( self, df: DataFrame, partition_spec: Optional[PartitionSpec], agg_cols: List[ColumnExpr], metadata: Any = None, ): """Aggregate on dataframe :param df: the dataframe to aggregate on :param partition_spec: PartitionSpec to specify partition keys :param agg_cols: aggregation expressions :param metadata: dict-like object to add to the result dataframe, defaults to None. It's currently not used :return: the aggregated result as a dataframe .. admonition:: New Since :class: hint **0.6.0** .. seealso:: Please find more expression examples in :mod:`fugue.column.sql` and :mod:`fugue.column.functions` .. admonition:: Examples .. code-block:: python import fugue.column.functions as f # SELECT MAX(b) AS b FROM df engine.aggregate( df, partition_spec=None, agg_cols=[f.max(col("b"))]) # SELECT a, MAX(b) AS x FROM df GROUP BY a engine.aggregate( df, partition_spec=PartitionSpec(by=["a"]), agg_cols=[f.max(col("b")).alias("x")]) """ assert_or_throw(len(agg_cols) > 0, ValueError("agg_cols can't be empty")) assert_or_throw( all(is_agg(x) for x in agg_cols), ValueError("all agg_cols must be aggregation functions"), ) keys: List[ColumnExpr] = [] if partition_spec is not None and len(partition_spec.partition_by) > 0: keys = [col(y) for y in partition_spec.partition_by] cols = SelectColumns(*keys, *agg_cols) return self.select(df, cols=cols, metadata=metadata)
def __init__( self, mu: float, sigma: float, q: Optional[float] = None, ): assert_or_throw(sigma > 0, ValueError(sigma)) assert_or_throw(q is None or q > 0, ValueError(q)) self.mu = mu self.sigma = sigma super().__init__(q)
def _get_distributed(self, distributed: Optional[bool]) -> bool: if distributed is None: return self._optimizer.distributable if distributed: assert_or_throw( self._optimizer.distributable, TuneCompileError( f"can't distribute non-distributable optimizer {self._optimizer}" ), ) return True return False
def __init__( self, mu: int, sigma: float, q: int = 1, ): assert_or_throw(sigma > 0, ValueError(sigma)) assert_or_throw(q > 0, ValueError(q)) self.mu = mu self.sigma = sigma self.q = q super().__init__(q)