示例#1
0
文件: test_bag.py 项目: jrenner/dask
def test_from_imperative():
    from dask.imperative import value
    a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9])
    bb = from_imperative([a, b, c])

    assert isinstance(bb, Bag)
    assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]
示例#2
0
def test_kwargs():
    def mysum(a, b, c=(), **kwargs):
        return a + b + sum(c) + sum(kwargs.values())
    dmysum = do(mysum)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2))
    assert ten.compute() == 10
    dmysum = do(mysum, pure=True)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2))
    assert ten.compute() == 10
示例#3
0
def test_kwargs():
    def mysum(a, b, c=(), **kwargs):
        return a + b + sum(c) + sum(kwargs.values())

    dmysum = do(mysum)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2))
    assert ten.compute() == 10
    dmysum = do(mysum, pure=True)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2))
    assert ten.compute() == 10
示例#4
0
def test_operators():
    a = value([1, 2, 3])
    assert a[0].compute() == 1
    assert (a + a).compute() == [1, 2, 3, 1, 2, 3]

    a = value(10)
    assert (a + 1).compute() == 11
    assert (1 + a).compute() == 11
    assert (a >> 1).compute() == 5
    assert (a > 2).compute()
    assert (a**2).compute() == 100
示例#5
0
def test_operators():
    a = value([1, 2, 3])
    assert a[0].compute() == 1
    assert (a + a).compute() == [1, 2, 3, 1, 2, 3]

    a = value(10)
    assert (a + 1).compute() == 11
    assert (1 + a).compute() == 11
    assert (a >> 1).compute() == 5
    assert (a > 2).compute()
    assert (a ** 2).compute() == 100
示例#6
0
def test_from_imperative():
    from dask.imperative import value, do
    a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9])
    bb = from_imperative([a, b, c])
    assert bb.name == from_imperative([a, b, c]).name

    assert isinstance(bb, Bag)
    assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]

    asum_value = do(lambda X: sum(X))(a)
    asum_item = db.Item.from_imperative(asum_value)
    assert asum_value.compute() == asum_item.compute() == 6
示例#7
0
def test_from_imperative():
    from dask.imperative import value, do
    a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9])
    bb = from_imperative([a, b, c])
    assert bb.name == from_imperative([a, b, c]).name

    assert isinstance(bb, Bag)
    assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]

    asum_value = do(lambda X: sum(X))(a)
    asum_item = db.Item.from_imperative(asum_value)
    assert asum_value.compute() == asum_item.compute() == 6
示例#8
0
    def fit(self, X, y=None):
        X = value(X)
        if y is not None:
            y = value(y)
        new_ests = []
        for name, est in self.steps:
            new_est = do(fit)(est, X, y)
            X = do(transform)(new_est, X)
            new_ests.append(new_est)

        return Pipeline([(name, new_est) for (name, old_est), new_est
                                          in zip(self.steps, new_ests)])
示例#9
0
def test_iterators():
    a = value(1)
    b = value(2)
    c = do(sum)(iter([a, b]))

    assert c.compute() == 3

    def f(seq):
        assert isinstance(seq, Iterator)
        return sum(seq)

    c = do(f)(iter([a, b]))
    assert c.compute() == 3
示例#10
0
def test_iterators():
    a = value(1)
    b = value(2)
    c = do(sum)(iter([a, b]))

    assert c.compute() == 3

    def f(seq):
        assert isinstance(seq, Iterator)
        return sum(seq)

    c = do(f)(iter([a, b]))
    assert c.compute() == 3
示例#11
0
def test_to_task_dasks():
    a = value(1, 'a')
    b = value(2, 'b')
    task, dasks = to_task_dasks([a, b, 3])
    assert task == (list, ['a', 'b', 3])
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    task, dasks = to_task_dasks({a: 1, b: 2})
    assert (task == (dict, (list, [(list, ['b', 2]), (list, ['a', 1])]))
            or task == (dict, (list, [(list, ['a', 1]), (list, ['b', 2])])))
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks
示例#12
0
def test_to_task_dasks():
    a = value(1, 'a')
    b = value(2, 'b')
    task, dasks = to_task_dasks([a, b, 3])
    assert task == (list, ['a', 'b', 3])
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    task, dasks = to_task_dasks({a: 1, b: 2})
    assert (task == (dict, (list, [(list, ['b', 2]), (list, ['a', 1])]))
            or task == (dict, (list, [(list, ['a', 1]), (list, ['b', 2])])))
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks
示例#13
0
def test_compute():
    a = value(1) + 5
    b = a + 1
    c = a + 2
    assert compute(b, c) == (7, 8)
    assert compute(b) == (7, )
    assert compute([a, b], c) == ([6, 7], 8)
示例#14
0
def test_compute():
    a = value(1) + 5
    b = a + 1
    c = a + 2
    assert compute(b, c) == (7, 8)
    assert compute(b) == (7,)
    assert compute([a, b], c) == ([6, 7], 8)
示例#15
0
def test_do():
    add2 = do(add)
    assert add2(1, 2).compute() == 3
    assert (add2(1, 2) + 3).compute() == 6
    assert add2(add2(1, 2), 3).compute() == 6
    a = value(1)
    b = add2(add2(a, 2), 3)
    assert a.key in b.dask
示例#16
0
def test_do():
    add2 = do(add)
    assert add2(1, 2).compute() == 3
    assert (add2(1, 2) + 3).compute() == 6
    assert add2(add2(1, 2), 3).compute() == 6
    a = value(1)
    b = add2(add2(a, 2), 3)
    assert a.key in b.dask
示例#17
0
def test_sync_compute(loop):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port'])) as e:
            from dask.imperative import do, value
            x = value(1)
            y = do(inc)(x)
            z = do(dec)(x)

            yy, zz = e.compute(y, z, sync=True)
            assert (yy, zz) == (2, 0)
示例#18
0
def test_sync_compute(loop):
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port'])) as e:
            from dask.imperative import do, value
            x = value(1)
            y = do(inc)(x)
            z = do(dec)(x)

            yy, zz = e.compute(y, z, sync=True)
            assert (yy, zz) == (2, 0)
示例#19
0
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer,
                    fit_params, iid):
    """ Lazily apply fit-and-score to data on all parameters / folds

    This function does little of the input checking and it doesn't trigger
    computation.

    Returns a lazy value object.  This should return almost immediately
    """
    _X, _y = X, y
    X = value(X)
    y = y if y is None else value(y)
    cv = [(value(train), value(test)) for train, test in cv]

    out = [_fit_and_score(estimator, X, y, scorer, train,
                          test, parameters, fit_params)
           for parameters in parameter_iterable
           for train, test in cv]

    return do(pick_best_parameters)(out, len(cv), iid)
示例#20
0
def test_to_task_dasks():
    a = value(1, 'a')
    b = value(2, 'b')
    task, dasks = to_task_dasks([a, b, 3])
    assert task == ['a', 'b', 3]
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    task, dasks = to_task_dasks({a: 1, b: 2})
    assert (task == (dict, [['b', 2], ['a', 1]])
            or task == (dict, [['a', 1], ['b', 2]]))
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    f = namedtuple('f', ['x', 'y'])
    x = f(1, 2)
    task, dasks = to_task_dasks(x)
    assert task == x
    assert dasks == []
示例#21
0
def test_to_task_dasks():
    a = value(1, 'a')
    b = value(2, 'b')
    task, dasks = to_task_dasks([a, b, 3])
    assert task == ['a', 'b', 3]
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    task, dasks = to_task_dasks({a: 1, b: 2})
    assert (task == (dict, [['b', 2], ['a', 1]])
            or task == (dict, [['a', 1], ['b', 2]]))
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    f = namedtuple('f', ['x', 'y'])
    x = f(1, 2)
    task, dasks = to_task_dasks(x)
    assert task == x
    assert dasks == []
示例#22
0
def test_value_errors():
    a = value([1, 2, 3])
    # Immutable
    assert raises(TypeError, lambda: setattr(a, 'foo', 1))
    assert raises(TypeError, lambda: setattr(a, '_key', 'test'))
    # Can't iterate, or check if contains
    assert raises(TypeError, lambda: 1 in a)
    assert raises(TypeError, lambda: list(a))
    # No dynamic generation of magic methods
    assert raises(AttributeError, lambda: a.__len__())
    # Truth of values forbidden
    assert raises(TypeError, lambda: bool(a))
示例#23
0
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer,
                    fit_params, iid):
    """ Lazily apply fit-and-score to data on all parameters / folds

    This function does little of the input checking and it doesn't trigger
    computation.

    Returns a lazy value object.  This should return almost immediately
    """
    _X, _y = X, y
    X = value(X)
    y = y if y is None else value(y)
    cv = [(value(train), value(test)) for train, test in cv]

    out = [
        _fit_and_score(estimator, X, y, scorer, train, test, parameters,
                       fit_params) for parameters in parameter_iterable
        for train, test in cv
    ]

    return do(pick_best_parameters)(out, len(cv), iid)
示例#24
0
def test_to_task_dasks():
    a = value(1, "a")
    b = value(2, "b")
    task, dasks = to_task_dasks([a, b, 3])
    assert task == (list, ["a", "b", 3])
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    task, dasks = to_task_dasks({a: 1, b: 2})
    assert task == (dict, (list, [(list, ["b", 2]), (list, ["a", 1])])) or task == (
        dict,
        (list, [(list, ["a", 1]), (list, ["b", 2])]),
    )
    assert len(dasks) == 2
    assert a.dask in dasks
    assert b.dask in dasks

    f = namedtuple("f", ["x", "y"])
    x = f(1, 2)
    task, dasks = to_task_dasks(x)
    assert task == x
    assert dasks == []
示例#25
0
def test_literates():
    a = value(1)
    b = a + 1
    lit = (a, b, 3)
    assert value(lit).compute() == (1, 2, 3)
    lit = set((a, b, 3))
    assert value(lit).compute() == set((1, 2, 3))
    lit = {a: 'a', b: 'b', 3: 'c'}
    assert value(lit).compute() == {1: 'a', 2: 'b', 3: 'c'}
    assert value(lit)[a].compute() == 'a'
    lit = {'a': a, 'b': b, 'c': 3}
    assert value(lit).compute() == {'a': 1, 'b': 2, 'c': 3}
    assert value(lit)['a'].compute() == 1
示例#26
0
def test_literates():
    a = value(1)
    b = a + 1
    lit = (a, b, 3)
    assert value(lit).compute() == (1, 2, 3)
    lit = set((a, b, 3))
    assert value(lit).compute() == set((1, 2, 3))
    lit = {a: 'a', b: 'b', 3: 'c'}
    assert value(lit).compute() == {1: 'a', 2: 'b', 3: 'c'}
    assert value(lit)[a].compute() == 'a'
    lit = {'a': a, 'b': b, 'c': 3}
    assert value(lit).compute() == {'a': 1, 'b': 2, 'c': 3}
    assert value(lit)['a'].compute() == 1
示例#27
0
def test_literates():
    a = value(1)
    b = a + 1
    lit = (a, b, 3)
    assert value(lit).compute() == (1, 2, 3)
    lit = set((a, b, 3))
    assert value(lit).compute() == set((1, 2, 3))
    lit = {a: "a", b: "b", 3: "c"}
    assert value(lit).compute() == {1: "a", 2: "b", 3: "c"}
    assert value(lit)[a].compute() == "a"
    lit = {"a": a, "b": b, "c": 3}
    assert value(lit).compute() == {"a": 1, "b": 2, "c": 3}
    assert value(lit)["a"].compute() == 1
示例#28
0
 def _pre_transform(self, X, y=None, **fit_params):
     fit_params_steps = dict((step, {}) for step, _ in self.steps)
     for pname, pval in six.iteritems(fit_params):
         step, param = pname.split('__', 1)
         fit_params_steps[step][param] = pval
     Xt = X
     #set_trace()
     for name, transform in self.steps[:-1]:
         lazy_transform = value(transform)
         if hasattr(transform, "fit_transform"):
             Xt = lazy_transform.fit_transform(Xt, y, **fit_params_steps[name])
         else:
             Xt = lazy_transform.fit(Xt, y, **fit_params_steps[name]) \
                           .transform(Xt)
     return Xt, fit_params_steps[self.steps[-1][0]]
示例#29
0
def test_value_errors():
    a = value([1, 2, 3])
    # Immutable
    assert raises(TypeError, lambda: setattr(a, 'foo', 1))
    assert raises(TypeError, lambda: setattr(a, '_key', 'test'))
    def setitem(a, ind, val):
        a[ind] = val
    assert raises(TypeError, lambda: setitem(a, 1, 0))
    # Can't iterate, or check if contains
    assert raises(TypeError, lambda: 1 in a)
    assert raises(TypeError, lambda: list(a))
    # No dynamic generation of magic methods
    assert raises(AttributeError, lambda: a.__len__())
    # Truth of values forbidden
    assert raises(TypeError, lambda: bool(a))
示例#30
0
def test_value_errors():
    a = value([1, 2, 3])
    # Immutable
    assert raises(TypeError, lambda: setattr(a, "foo", 1))
    assert raises(TypeError, lambda: setattr(a, "_key", "test"))

    def setitem(a, ind, val):
        a[ind] = val

    assert raises(TypeError, lambda: setitem(a, 1, 0))
    # Can't iterate, or check if contains
    assert raises(TypeError, lambda: 1 in a)
    assert raises(TypeError, lambda: list(a))
    # No dynamic generation of magic methods
    assert raises(AttributeError, lambda: a.__len__())
    # Truth of values forbidden
    assert raises(TypeError, lambda: bool(a))
示例#31
0
def test_async_compute(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    from dask.imperative import do, value
    x = value(1)
    y = do(inc)(x)
    z = do(dec)(x)

    yy, zz, aa = e.compute(y, z, 3, sync=False)
    assert isinstance(yy, Future)
    assert isinstance(zz, Future)
    assert aa == 3

    result = yield e._gather([yy, zz])
    assert result == [2, 0]

    yield e._shutdown()
示例#32
0
    def f(c, a, b):
        e = Executor((c.ip, c.port), start=False, loop=loop)
        yield e._start()

        from dask.imperative import do, value
        x = value(1)
        y = do(inc)(x)
        z = do(dec)(x)

        yy, zz, aa = e.compute(y, z, 3, sync=False)
        assert isinstance(yy, Future)
        assert isinstance(zz, Future)
        assert aa == 3

        result = yield e._gather([yy, zz])
        assert result == [2, 0]

        yield e._shutdown()
示例#33
0
def test_lists():
    a = value(1)
    b = value(2)
    c = do(sum)([a, b])
    assert c.compute() == 3
示例#34
0
def test_common_subexpressions():
    a = value([1, 2, 3])
    res = a[0] + a[0]
    assert a[0].key in res.dask
    assert a.key in res.dask
    assert len(res.dask) == 3
示例#35
0
def test_named_value():
    assert "X" in value(1, name="X").dask
示例#36
0
def test_attributes():
    a = value(2 + 1j)
    assert a.real.compute() == 2
    assert a.imag.compute() == 1
示例#37
0
def test_value_picklable():
    x = value(1)
    y = pickle.loads(pickle.dumps(x))
    assert x.dask == y.dask
    assert x._key == y._key
示例#38
0
def test_imperative_compute_forward_kwargs():
    x = value(1) + 2
    x.compute(bogus_keyword=10)
示例#39
0
def test_named_value():
    assert 'X' in value(1, name='X').dask
示例#40
0
def test_attributes():
    a = value(2 + 1j)
    assert a.real.compute() == 2
    assert a.imag.compute() == 1
示例#41
0
def test_common_subexpressions():
    a = value([1, 2, 3])
    res = a[0] + a[0]
    assert a[0].key in res.dask
    assert a.key in res.dask
    assert len(res.dask) == 3
示例#42
0
def test_methods():
    a = value("a b c d e")
    assert a.split(' ').compute() == ['a', 'b', 'c', 'd', 'e']
    assert a.upper().replace('B', 'A').split().count('A').compute() == 2
    assert a.split(' ', pure=True).key == a.split(' ', pure=True).key
示例#43
0
 def score(self, X, y):
     X = value(X)
     y = value(y)
     y_predicted = self.predict(X)
     return do(accuracy_score)(y_predicted, y)
示例#44
0
def test_key_names_include_type_names():
    assert value(1).key.startswith('int')
示例#45
0
def test_value():
    v = value(1)
    assert v.compute() == 1
    assert 1 in v.dask.values()
示例#46
0
                         ("svm", LinearSVC())])

# X, y = make_blobs()
categories = [
    'alt.atheism',
    'talk.religion.misc',
]

data_train = fetch_20newsgroups(subset='train', categories=categories)
data_test = fetch_20newsgroups(subset='test', categories=categories)
X_train, y_train = data_train.data, data_train.target
X_test, y_test = data_test.data, data_test.target

for fdr in [0.05, 0.01, 0.1, 0.2]:
    for C in np.logspace(-3, 2, 3):
        pipeline.set_params(select_fdr__alpha=fdr, svm__C=C)
        pipeline.fit(X_train, y_train)
        results.append(pipeline.score(X_test, y_test))


"""
from dask.diagnostics import ProgressBar
ProgressBar().register()
"""

from dask.imperative import compute, value
value(results).visualize('dask.pdf')
results2 = compute(results, get=get_sync)
print results2

示例#47
0
def test_lists_are_concrete():
    a = value(1)
    b = value(2)
    c = do(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1]

    assert c.compute() == 20
示例#48
0
def test_methods():
    a = value("a b c d e")
    assert a.split(' ').compute() == ['a', 'b', 'c', 'd', 'e']
    assert a.upper().replace('B', 'A').split().count('A').compute() == 2
示例#49
0
def test_imperative_compute_forward_kwargs():
    x = value(1) + 2
    x.compute(bogus_keyword=10)
示例#50
0
def test_named_value():
    assert 'X' in value(1, name='X').dask
示例#51
0
def test_methods():
    a = value("a b c d e")
    assert a.split(" ").compute() == ["a", "b", "c", "d", "e"]
    assert a.upper().replace("B", "A").split().count("A").compute() == 2
示例#52
0
def test_value():
    v = value(1)
    assert v.compute() == 1
    assert 1 in v.dask.values()
示例#53
0
    def _fit(self, X, y, parameter_iterable):
        """Actual fitting,  performing the search over parameters."""

        cv = self.cv
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        X, y = indexable(X, y)

        cv = check_cv(cv, X, y, classifier=is_classifier(self.estimator))

        base_estimator = clone(self.estimator)
        out = [_fit_and_score(clone(base_estimator), X, y, self.scorer_, train,
                              test, self.verbose, parameters, self.fit_params,
                              return_parameters=True,
                              error_score=self.error_score)
               for parameters in parameter_iterable
               for train, test in cv]
        self._dask_value = value(out)

        out, = compute(value(out))
        n_fits = len(out)
        n_folds = len(cv)

        scores = list()
        grid_scores = list()
        for grid_start in range(0, n_fits, n_folds):
            n_test_samples = 0
            score = 0
            all_scores = []
            for this_score, this_n_test_samples, _, parameters in \
                    out[grid_start:grid_start + n_folds]:
                all_scores.append(this_score)
                if self.iid:
                    this_score *= this_n_test_samples
                    n_test_samples += this_n_test_samples
                score += this_score
            if self.iid:
                score /= float(n_test_samples)
            else:
                score /= float(n_folds)
            scores.append((score, parameters))
            # TODO: shall we also store the test_fold_sizes?
            grid_scores.append(_CVScoreTuple(
                parameters,
                score,
                np.array(all_scores)))
        # Store the computed scores
        self.grid_scores_ = grid_scores

        # Find the best parameters by comparing on the mean validation score:
        # note that `sorted` is deterministic in the way it breaks ties
        best = sorted(grid_scores, key=lambda x: x.mean_validation_score,
                      reverse=True)[0]
        self.best_params_ = best.parameters
        self.best_score_ = best.mean_validation_score

        if self.refit:
            # fit the best estimator using the entire dataset
            # clone first to work around broken estimators
            best_estimator = clone(base_estimator).set_params(
                **best.parameters)
            if y is not None:
                best_estimator.fit(X, y, **self.fit_params)
            else:
                best_estimator.fit(X, **self.fit_params)
            self.best_estimator_ = best_estimator
        return self
示例#54
0
def test_lists():
    a = value(1)
    b = value(2)
    c = do(sum)([a, b])
    assert c.compute() == 3
示例#55
0
def test_lists_are_concrete():
    a = value(1)
    b = value(2)
    c = do(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1]

    assert c.compute() == 20
示例#56
0
def test_value_picklable():
    x = value(1)
    y = pickle.loads(pickle.dumps(x))
    assert x.dask == y.dask
    assert x._key == y._key