示例#1
0
 def jsondict(self) -> ParamDict:
     res = ParamDict()
     for k, v in self.paramdict.items():
         if isinstance(v, type):
             v = get_full_type_path(v)
         res[k] = v
     return res
示例#2
0
 def __init__(self, func: Callable):
     super().__init__()
     assert_or_throw(callable(func), lambda: ValueError(func))
     self._func = func
     if isinstance(func, LambdaType):
         self._uuid = to_uuid("lambda")
     else:
         self._uuid = to_uuid(get_full_type_path(func))
示例#3
0
def function_to_taskspec(
    func: Callable,
    is_config: Callable[[List[Dict[str, Any]]], List[bool]],
    deterministic: bool = True,
    lazy: bool = False,
) -> TaskSpec:
    specs = inspect.getfullargspec(func)
    sig = inspect.signature(func)
    annotations = get_type_hints(func)
    assert_or_throw(
        specs.varargs is None and specs.varkw is None and len(specs.kwonlyargs) == 0,
        "Function can't have varargs or kwargs",
    )
    inputs: List[InputSpec] = []
    configs: List[ConfigSpec] = []
    outputs: List[OutputSpec] = []
    arr: List[Dict[str, Any]] = []
    for k, w in sig.parameters.items():
        anno = annotations.get(k, w.annotation)
        a = _parse_annotation(anno)
        a["name"] = k
        if w.default == inspect.Parameter.empty:
            a["required"] = True
        else:
            a["required"] = False
            a["default_value"] = w.default
        arr.append(a)
    cfg = is_config(arr)
    for i in range(len(cfg)):
        if cfg[i]:
            configs.append(ConfigSpec(**arr[i]))
        else:
            assert_or_throw(
                arr[i]["required"], f"{arr[i]}: dependency must not have default value"
            )
            inputs.append(InputSpec(**arr[i]))
    n = 0
    anno = annotations.get("return", sig.return_annotation)
    is_multiple = _is_tuple(anno)
    for x in list(anno.__args__) if is_multiple else [anno]:
        if x == inspect.Parameter.empty or x is type(None):  # noqa: E721
            continue
        a = _parse_annotation(x)
        a["name"] = f"_{n}"
        outputs.append(OutputSpec(**a))
        n += 1
    metadata = dict(__interfaceless_func=get_full_type_path(func))
    return TaskSpec(
        configs,
        inputs,
        outputs,
        _interfaceless_wrapper,
        metadata,
        deterministic=deterministic,
        lazy=lazy,
    )
示例#4
0
 def __uuid__(self) -> str:
     return to_uuid(
         self.configs,
         self.inputs,
         self.outputs,
         get_full_type_path(self.func),
         self.metadata,
         self.deterministic,
         self.lazy,
         self._node_spec,
     )
示例#5
0
def test_start_stop():
    conf = {"fugue.rpc.server": get_full_type_path(_MockRPC)}
    engine = _MockExecutionEngine(conf=conf)
    engine.start()
    engine.start()
    engine.stop()
    engine.stop()
    # second round
    engine.start()
    engine.stop()
    assert 2 == engine._start
    assert 2 == engine._stop
    assert 2 == _MockRPC._start
    assert 2 == _MockRPC._stop
示例#6
0
 def jsondict(self) -> ParamDict:
     res = ParamDict(
         dict(
             configs=[c.jsondict for c in self.configs.values()],
             inputs=[c.jsondict for c in self.inputs.values()],
             outputs=[c.jsondict for c in self.outputs.values()],
             func=get_full_type_path(self.func),
             metadata=self.metadata,
             deterministic=self.deterministic,
             lazy=self.lazy,
         ))
     if self._node_spec is not None:
         res["node_spec"] = self.node_spec.jsondict
     return res
示例#7
0
        def test_select(self):
            with self.dag() as dag:
                a = dag.df([[1, 10], [2, 20], [3, 30]], "x:long,y:long")
                b = dag.df([[2, 20, 40], [3, 30, 90]], "x:long,y:long,z:long")
                dag.select("* FROM", a).assert_eq(a)
                dag.select("SELECT *,x*y AS z FROM", a,
                           "WHERE x>=2").assert_eq(b)

                c = dag.df([[2, 20, 40], [3, 30, 90]], "x:long,y:long,zb:long")
                dag.select(
                    "  SELECT t1.*,z AS zb FROM ",
                    a,
                    "AS t1 INNER JOIN",
                    b,
                    "AS t2 ON t1.x=t2.x  ",
                ).assert_eq(c)

                # no select
                dag.select("t1.*,z AS zb FROM ", a, "AS t1 INNER JOIN", b,
                           "AS t2 ON t1.x=t2.x").assert_eq(c)

                # specify sql engine
                dag.select(
                    "SELECT t1.*,z AS zb FROM ",
                    a,
                    "AS t1 INNER JOIN",
                    b,
                    "AS t2 ON t1.x=t2.x",
                    sql_engine=SqliteEngine,
                ).assert_eq(c)

                # specify sql engine
                dag.select(
                    "SELECT t1.*,z AS zb FROM ",
                    a,
                    "AS t1 INNER JOIN",
                    b,
                    "AS t2 ON t1.x=t2.x",
                    sql_engine=get_full_type_path(SqliteEngine),
                ).assert_eq(c)

                # no input
                dag.select("9223372036854775807 AS a").assert_eq(
                    dag.df([[9223372036854775807]], "a:long"))

                # make sure transform -> select works
                b = a.transform(mock_tf1)
                a = a.transform(mock_tf1)
                aa = dag.select("* FROM", a)
                dag.select("* FROM", b).assert_eq(aa)
示例#8
0
 def __uuid__(self) -> str:
     return to_uuid(get_full_type_path(self._func), self._params, self._rt)
示例#9
0
def test_get_full_type_path():
    assert "tests.utils.test_convert.dummy_for_test" == get_full_type_path(
        dummy_for_test)
    raises(TypeError, lambda: get_full_type_path(lambda x: x + 1))
    assert "tests.utils.test_convert.__Dummy__" == get_full_type_path(
        __Dummy__)
    assert "tests.utils.convert_examples.SubClass" == get_full_type_path(
        SubClassSame)
    raises(TypeError, lambda: get_full_type_path(None))
    assert "builtins.int" == get_full_type_path(int)
    assert "builtins.dict" == get_full_type_path(dict)
    assert "builtins.Exception" == get_full_type_path(Exception)

    assert "builtins.int" == get_full_type_path(123)
    assert "builtins.str" == get_full_type_path("ad")
    assert "tests.utils.test_convert.__Dummy__" == get_full_type_path(
        __Dummy__())
示例#10
0
def _to_model_str(model: Any) -> Any:
    if isinstance(model, str):
        model = _to_model(model)
    return get_full_type_path(model)
示例#11
0
def _sk_stack_cv(
    _sk__model: str,
    _sk__estimators: str,
    _sk__train_df: pd.DataFrame,
    _sk__scoring: Any,
    _sk__stack_cv: int = 2,
    _sk__method: str = "auto",
    _sk__passthrough: bool = False,
    _sk__cv: int = 5,
    _sk__feature_prefix: str = "",
    _sk__label_col: str = "label",
    _sk__save_path: str = "",
    **kwargs: Any,
) -> Dict[str, Any]:
    final_estimator = _to_model(_sk__model)(**kwargs)
    estimators: List[Tuple[str, Any]] = []
    for i, d in enumerate(json.loads(_sk__estimators)):
        key = f"_{i}"
        m = _to_model(d.pop("_sk__model"))
        estimators.append((key, m(**d)))
    if is_classifier(final_estimator):
        model = StackingClassifier(
            estimators,
            final_estimator,
            cv=_sk__stack_cv,
            stack_method=_sk__method,
            passthrough=_sk__passthrough,
            n_jobs=kwargs.get("n_jobs", 1),
        )
    else:
        model = StackingRegressor(
            estimators,
            final_estimator,
            cv=_sk__stack_cv,
            passthrough=_sk__passthrough,
            n_jobs=kwargs.get("n_jobs", 1),
        )
    train_df = _sk__train_df.sample(frac=1,
                                    random_state=0).reset_index(drop=True)

    train_x = train_df.drop([_sk__label_col], axis=1)
    cols = [x for x in train_x.columns if x.startswith(_sk__feature_prefix)]
    train_x = train_x[cols]
    train_y = train_df[_sk__label_col]

    s = cross_val_score(model,
                        train_x,
                        train_y,
                        cv=_sk__cv,
                        scoring=_sk__scoring)
    metadata = dict(sk_model=get_full_type_path(model),
                    cv_scores=[float(x) for x in s])
    if _sk__save_path != "":
        model.fit(train_x, train_y)
        fp = os.path.join(_sk__save_path, str(uuid4()) + ".pkl")
        with FileSystem().openbin(fp, mode="wb") as f:
            pickle.dump(model, f)
        metadata["model_path"] = fp
    return dict(
        error=-np.mean(s),
        hp=dict(
            _sk__model=get_full_type_path(model),
            _sk__estimators=dict(
                **{
                    f"_{i}": d
                    for i, d in enumerate(json.loads(_sk__estimators))
                },
                stacking=dict(_sk__model=_sk__model, **kwargs),
            ),
            _sk__stack_cv=_sk__stack_cv,
            _sk__method=_sk__method,
            _sk__passthrough=_sk__passthrough,
        ),
        metadata=metadata,
    )
示例#12
0
 def __uuid__(self) -> str:
     return to_uuid(get_full_type_path(self))
示例#13
0
 def __uuid__(self) -> str:
     """Unique id for this expression"""
     return to_uuid(get_full_type_path(self._func), self._args,
                    self._kwargs)
示例#14
0
def to_keras_spec_expr(spec: Any) -> str:
    if isinstance(spec, str):
        spec = to_keras_spec(spec)
    return get_full_type_path(spec)
示例#15
0
def to_sk_model_expr(model: Any) -> Any:
    if isinstance(model, str):
        model = to_sk_model(model)
    return get_full_type_path(model)