def test_from_imperative(): from dask.imperative import value a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9]) bb = from_imperative([a, b, c]) assert isinstance(bb, Bag) assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]
def test_kwargs(): def mysum(a, b, c=(), **kwargs): return a + b + sum(c) + sum(kwargs.values()) dmysum = do(mysum) ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2)) assert ten.compute() == 10 dmysum = do(mysum, pure=True) ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2)) assert ten.compute() == 10
def test_kwargs(): def mysum(a, b, c=(), **kwargs): return a + b + sum(c) + sum(kwargs.values()) dmysum = do(mysum) ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2)) assert ten.compute() == 10 dmysum = do(mysum, pure=True) ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2)) assert ten.compute() == 10
def test_operators(): a = value([1, 2, 3]) assert a[0].compute() == 1 assert (a + a).compute() == [1, 2, 3, 1, 2, 3] a = value(10) assert (a + 1).compute() == 11 assert (1 + a).compute() == 11 assert (a >> 1).compute() == 5 assert (a > 2).compute() assert (a**2).compute() == 100
def test_operators(): a = value([1, 2, 3]) assert a[0].compute() == 1 assert (a + a).compute() == [1, 2, 3, 1, 2, 3] a = value(10) assert (a + 1).compute() == 11 assert (1 + a).compute() == 11 assert (a >> 1).compute() == 5 assert (a > 2).compute() assert (a ** 2).compute() == 100
def test_from_imperative(): from dask.imperative import value, do a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9]) bb = from_imperative([a, b, c]) assert bb.name == from_imperative([a, b, c]).name assert isinstance(bb, Bag) assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9] asum_value = do(lambda X: sum(X))(a) asum_item = db.Item.from_imperative(asum_value) assert asum_value.compute() == asum_item.compute() == 6
def fit(self, X, y=None): X = value(X) if y is not None: y = value(y) new_ests = [] for name, est in self.steps: new_est = do(fit)(est, X, y) X = do(transform)(new_est, X) new_ests.append(new_est) return Pipeline([(name, new_est) for (name, old_est), new_est in zip(self.steps, new_ests)])
def test_iterators(): a = value(1) b = value(2) c = do(sum)(iter([a, b])) assert c.compute() == 3 def f(seq): assert isinstance(seq, Iterator) return sum(seq) c = do(f)(iter([a, b])) assert c.compute() == 3
def test_to_task_dasks(): a = value(1, 'a') b = value(2, 'b') task, dasks = to_task_dasks([a, b, 3]) assert task == (list, ['a', 'b', 3]) assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks task, dasks = to_task_dasks({a: 1, b: 2}) assert (task == (dict, (list, [(list, ['b', 2]), (list, ['a', 1])])) or task == (dict, (list, [(list, ['a', 1]), (list, ['b', 2])]))) assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks
def test_compute(): a = value(1) + 5 b = a + 1 c = a + 2 assert compute(b, c) == (7, 8) assert compute(b) == (7, ) assert compute([a, b], c) == ([6, 7], 8)
def test_compute(): a = value(1) + 5 b = a + 1 c = a + 2 assert compute(b, c) == (7, 8) assert compute(b) == (7,) assert compute([a, b], c) == ([6, 7], 8)
def test_do(): add2 = do(add) assert add2(1, 2).compute() == 3 assert (add2(1, 2) + 3).compute() == 6 assert add2(add2(1, 2), 3).compute() == 6 a = value(1) b = add2(add2(a, 2), 3) assert a.key in b.dask
def test_sync_compute(loop): with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port'])) as e: from dask.imperative import do, value x = value(1) y = do(inc)(x) z = do(dec)(x) yy, zz = e.compute(y, z, sync=True) assert (yy, zz) == (2, 0)
def test_sync_compute(loop): with cluster() as (s, [a, b]): with Executor(('127.0.0.1', s['port'])) as e: from dask.imperative import do, value x = value(1) y = do(inc)(x) z = do(dec)(x) yy, zz = e.compute(y, z, sync=True) assert (yy, zz) == (2, 0)
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer, fit_params, iid): """ Lazily apply fit-and-score to data on all parameters / folds This function does little of the input checking and it doesn't trigger computation. Returns a lazy value object. This should return almost immediately """ _X, _y = X, y X = value(X) y = y if y is None else value(y) cv = [(value(train), value(test)) for train, test in cv] out = [_fit_and_score(estimator, X, y, scorer, train, test, parameters, fit_params) for parameters in parameter_iterable for train, test in cv] return do(pick_best_parameters)(out, len(cv), iid)
def test_to_task_dasks(): a = value(1, 'a') b = value(2, 'b') task, dasks = to_task_dasks([a, b, 3]) assert task == ['a', 'b', 3] assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks task, dasks = to_task_dasks({a: 1, b: 2}) assert (task == (dict, [['b', 2], ['a', 1]]) or task == (dict, [['a', 1], ['b', 2]])) assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks f = namedtuple('f', ['x', 'y']) x = f(1, 2) task, dasks = to_task_dasks(x) assert task == x assert dasks == []
def test_value_errors(): a = value([1, 2, 3]) # Immutable assert raises(TypeError, lambda: setattr(a, 'foo', 1)) assert raises(TypeError, lambda: setattr(a, '_key', 'test')) # Can't iterate, or check if contains assert raises(TypeError, lambda: 1 in a) assert raises(TypeError, lambda: list(a)) # No dynamic generation of magic methods assert raises(AttributeError, lambda: a.__len__()) # Truth of values forbidden assert raises(TypeError, lambda: bool(a))
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer, fit_params, iid): """ Lazily apply fit-and-score to data on all parameters / folds This function does little of the input checking and it doesn't trigger computation. Returns a lazy value object. This should return almost immediately """ _X, _y = X, y X = value(X) y = y if y is None else value(y) cv = [(value(train), value(test)) for train, test in cv] out = [ _fit_and_score(estimator, X, y, scorer, train, test, parameters, fit_params) for parameters in parameter_iterable for train, test in cv ] return do(pick_best_parameters)(out, len(cv), iid)
def test_to_task_dasks(): a = value(1, "a") b = value(2, "b") task, dasks = to_task_dasks([a, b, 3]) assert task == (list, ["a", "b", 3]) assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks task, dasks = to_task_dasks({a: 1, b: 2}) assert task == (dict, (list, [(list, ["b", 2]), (list, ["a", 1])])) or task == ( dict, (list, [(list, ["a", 1]), (list, ["b", 2])]), ) assert len(dasks) == 2 assert a.dask in dasks assert b.dask in dasks f = namedtuple("f", ["x", "y"]) x = f(1, 2) task, dasks = to_task_dasks(x) assert task == x assert dasks == []
def test_literates(): a = value(1) b = a + 1 lit = (a, b, 3) assert value(lit).compute() == (1, 2, 3) lit = set((a, b, 3)) assert value(lit).compute() == set((1, 2, 3)) lit = {a: 'a', b: 'b', 3: 'c'} assert value(lit).compute() == {1: 'a', 2: 'b', 3: 'c'} assert value(lit)[a].compute() == 'a' lit = {'a': a, 'b': b, 'c': 3} assert value(lit).compute() == {'a': 1, 'b': 2, 'c': 3} assert value(lit)['a'].compute() == 1
def test_literates(): a = value(1) b = a + 1 lit = (a, b, 3) assert value(lit).compute() == (1, 2, 3) lit = set((a, b, 3)) assert value(lit).compute() == set((1, 2, 3)) lit = {a: "a", b: "b", 3: "c"} assert value(lit).compute() == {1: "a", 2: "b", 3: "c"} assert value(lit)[a].compute() == "a" lit = {"a": a, "b": b, "c": 3} assert value(lit).compute() == {"a": 1, "b": 2, "c": 3} assert value(lit)["a"].compute() == 1
def _pre_transform(self, X, y=None, **fit_params): fit_params_steps = dict((step, {}) for step, _ in self.steps) for pname, pval in six.iteritems(fit_params): step, param = pname.split('__', 1) fit_params_steps[step][param] = pval Xt = X #set_trace() for name, transform in self.steps[:-1]: lazy_transform = value(transform) if hasattr(transform, "fit_transform"): Xt = lazy_transform.fit_transform(Xt, y, **fit_params_steps[name]) else: Xt = lazy_transform.fit(Xt, y, **fit_params_steps[name]) \ .transform(Xt) return Xt, fit_params_steps[self.steps[-1][0]]
def test_value_errors(): a = value([1, 2, 3]) # Immutable assert raises(TypeError, lambda: setattr(a, 'foo', 1)) assert raises(TypeError, lambda: setattr(a, '_key', 'test')) def setitem(a, ind, val): a[ind] = val assert raises(TypeError, lambda: setitem(a, 1, 0)) # Can't iterate, or check if contains assert raises(TypeError, lambda: 1 in a) assert raises(TypeError, lambda: list(a)) # No dynamic generation of magic methods assert raises(AttributeError, lambda: a.__len__()) # Truth of values forbidden assert raises(TypeError, lambda: bool(a))
def test_value_errors(): a = value([1, 2, 3]) # Immutable assert raises(TypeError, lambda: setattr(a, "foo", 1)) assert raises(TypeError, lambda: setattr(a, "_key", "test")) def setitem(a, ind, val): a[ind] = val assert raises(TypeError, lambda: setitem(a, 1, 0)) # Can't iterate, or check if contains assert raises(TypeError, lambda: 1 in a) assert raises(TypeError, lambda: list(a)) # No dynamic generation of magic methods assert raises(AttributeError, lambda: a.__len__()) # Truth of values forbidden assert raises(TypeError, lambda: bool(a))
def test_async_compute(s, a, b): e = Executor((s.ip, s.port), start=False) yield e._start() from dask.imperative import do, value x = value(1) y = do(inc)(x) z = do(dec)(x) yy, zz, aa = e.compute(y, z, 3, sync=False) assert isinstance(yy, Future) assert isinstance(zz, Future) assert aa == 3 result = yield e._gather([yy, zz]) assert result == [2, 0] yield e._shutdown()
def f(c, a, b): e = Executor((c.ip, c.port), start=False, loop=loop) yield e._start() from dask.imperative import do, value x = value(1) y = do(inc)(x) z = do(dec)(x) yy, zz, aa = e.compute(y, z, 3, sync=False) assert isinstance(yy, Future) assert isinstance(zz, Future) assert aa == 3 result = yield e._gather([yy, zz]) assert result == [2, 0] yield e._shutdown()
def test_lists(): a = value(1) b = value(2) c = do(sum)([a, b]) assert c.compute() == 3
def test_common_subexpressions(): a = value([1, 2, 3]) res = a[0] + a[0] assert a[0].key in res.dask assert a.key in res.dask assert len(res.dask) == 3
def test_named_value(): assert "X" in value(1, name="X").dask
def test_attributes(): a = value(2 + 1j) assert a.real.compute() == 2 assert a.imag.compute() == 1
def test_value_picklable(): x = value(1) y = pickle.loads(pickle.dumps(x)) assert x.dask == y.dask assert x._key == y._key
def test_imperative_compute_forward_kwargs(): x = value(1) + 2 x.compute(bogus_keyword=10)
def test_named_value(): assert 'X' in value(1, name='X').dask
def test_methods(): a = value("a b c d e") assert a.split(' ').compute() == ['a', 'b', 'c', 'd', 'e'] assert a.upper().replace('B', 'A').split().count('A').compute() == 2 assert a.split(' ', pure=True).key == a.split(' ', pure=True).key
def score(self, X, y): X = value(X) y = value(y) y_predicted = self.predict(X) return do(accuracy_score)(y_predicted, y)
def test_key_names_include_type_names(): assert value(1).key.startswith('int')
def test_value(): v = value(1) assert v.compute() == 1 assert 1 in v.dask.values()
("svm", LinearSVC())]) # X, y = make_blobs() categories = [ 'alt.atheism', 'talk.religion.misc', ] data_train = fetch_20newsgroups(subset='train', categories=categories) data_test = fetch_20newsgroups(subset='test', categories=categories) X_train, y_train = data_train.data, data_train.target X_test, y_test = data_test.data, data_test.target for fdr in [0.05, 0.01, 0.1, 0.2]: for C in np.logspace(-3, 2, 3): pipeline.set_params(select_fdr__alpha=fdr, svm__C=C) pipeline.fit(X_train, y_train) results.append(pipeline.score(X_test, y_test)) """ from dask.diagnostics import ProgressBar ProgressBar().register() """ from dask.imperative import compute, value value(results).visualize('dask.pdf') results2 = compute(results, get=get_sync) print results2
def test_lists_are_concrete(): a = value(1) b = value(2) c = do(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1] assert c.compute() == 20
def test_methods(): a = value("a b c d e") assert a.split(' ').compute() == ['a', 'b', 'c', 'd', 'e'] assert a.upper().replace('B', 'A').split().count('A').compute() == 2
def test_methods(): a = value("a b c d e") assert a.split(" ").compute() == ["a", "b", "c", "d", "e"] assert a.upper().replace("B", "A").split().count("A").compute() == 2
def _fit(self, X, y, parameter_iterable): """Actual fitting, performing the search over parameters.""" cv = self.cv self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) X, y = indexable(X, y) cv = check_cv(cv, X, y, classifier=is_classifier(self.estimator)) base_estimator = clone(self.estimator) out = [_fit_and_score(clone(base_estimator), X, y, self.scorer_, train, test, self.verbose, parameters, self.fit_params, return_parameters=True, error_score=self.error_score) for parameters in parameter_iterable for train, test in cv] self._dask_value = value(out) out, = compute(value(out)) n_fits = len(out) n_folds = len(cv) scores = list() grid_scores = list() for grid_start in range(0, n_fits, n_folds): n_test_samples = 0 score = 0 all_scores = [] for this_score, this_n_test_samples, _, parameters in \ out[grid_start:grid_start + n_folds]: all_scores.append(this_score) if self.iid: this_score *= this_n_test_samples n_test_samples += this_n_test_samples score += this_score if self.iid: score /= float(n_test_samples) else: score /= float(n_folds) scores.append((score, parameters)) # TODO: shall we also store the test_fold_sizes? grid_scores.append(_CVScoreTuple( parameters, score, np.array(all_scores))) # Store the computed scores self.grid_scores_ = grid_scores # Find the best parameters by comparing on the mean validation score: # note that `sorted` is deterministic in the way it breaks ties best = sorted(grid_scores, key=lambda x: x.mean_validation_score, reverse=True)[0] self.best_params_ = best.parameters self.best_score_ = best.mean_validation_score if self.refit: # fit the best estimator using the entire dataset # clone first to work around broken estimators best_estimator = clone(base_estimator).set_params( **best.parameters) if y is not None: best_estimator.fit(X, y, **self.fit_params) else: best_estimator.fit(X, **self.fit_params) self.best_estimator_ = best_estimator return self