示例#1
0
def test_dask_pipeline_with_parameters_2_context():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_1(ts: dt.datetime):
        assert ts > dt.datetime(2019, 1, 1)
        return 5

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_2(fix: float):
        return 3 + fix

    @delayed()
    @cached(folder=CACHE_DIR)
    def add(x, y):
        return x + y

    ts = DelayedParameter("ts", value=dt.datetime(2020, 1, 1))
    fix = DelayedParameter("fix", value=0.5)
    d1 = load_data_1(ts())
    d2 = load_data_2(fix())
    r = add(d1, d2)

    (output,) = delayed_compute((r,))
    assert abs(output - 8.5) < EPS

    with ts.context(dt.datetime(2020, 2, 1)), fix.context(1.5):
        (output,) = delayed_compute((r,))
        assert abs(output - 9.5) < EPS

    (output,) = delayed_compute((r,))
    assert abs(output - 8.5) < EPS
示例#2
0
def test_dask_pipeline_sequential_runs():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_1():
        time.sleep(1)
        return 5

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_2():
        time.sleep(1)
        return 3

    @delayed()
    @cached(folder=CACHE_DIR)
    def add(x, y):
        return x + y

    d1 = load_data_1()
    d2 = load_data_2()
    r = add(d1, d2)

    start = dt.datetime.utcnow()
    d1_, d2_ = delayed_compute((d1, d2))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert 0.95 < delay < 1.95

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert delay < 0.95
    assert output == 8
示例#3
0
def test_dask_pipeline_with_parameters_2():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_1(ts: dt.datetime):
        assert ts > dt.datetime(2019, 1, 1)
        time.sleep(1)
        return 5

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_2(fix: float):
        time.sleep(1)
        return 3 + fix

    @delayed()
    @cached(folder=CACHE_DIR)
    def add(x, y):
        return x + y

    ts = DelayedParameter("ts", value=dt.datetime(2020, 1, 1))
    fix = DelayedParameter("fix", value=0.5)
    d1 = load_data_1(ts())
    d2 = load_data_2(fix())
    r = add(d1, d2)

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert 0.95 < delay < 1.95
    assert abs(output - 8.5) < EPS

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert delay < 0.95
    assert abs(output - 8.5) < EPS

    ts.set(dt.datetime(2020, 2, 1))
    fix.set(1.5)
    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert 0.95 < delay < 1.95
    assert abs(output - 9.5) < EPS

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert delay < 0.95
    assert abs(output - 9.5) < EPS
示例#4
0
def test_dask_pipeline_multiple_outputs():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data():
        # time.sleep(1)
        return [1, 1, 1, 2, 2, 2]

    @delayed(nout=2)
    @cached(folder=CACHE_DIR, nout=2)
    def split_data(data):
        return data[:3], data[3:]

    @delayed()
    @cached(folder=CACHE_DIR)
    def compute_sum(arr):
        # time.sleep(1)
        return sum(arr)

    data = load_data()
    x, y = split_data(data)
    xsum = compute_sum(x)
    ysum = compute_sum(y)

    # start = dt.datetime.utcnow()
    (x_, y_, xsum_, ysum_) = delayed_compute((x, y, xsum, ysum))
    # delay = (dt.datetime.utcnow() - start).total_seconds()
    # assert 0.95 < delay < 1.95
    assert x_ == [1, 1, 1]
    assert y_ == [2, 2, 2]
    assert xsum_ == 3
    assert ysum_ == 6
示例#5
0
def test_delayed_cached_load_time():
    @delayed_cached(folder=CACHE_DIR, override=False)
    def load_data():
        time.sleep(1)
        return 1

    clear_cache(CACHE_DIR)

    start = dt.datetime.utcnow()
    r = load_data()
    _ = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert delay > 0.95

    start = dt.datetime.utcnow()
    r = load_data()
    _ = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert delay < 0.95
示例#6
0
def test_delayed_cached_another_cache_dir():
    another_cache_dir = "cache/temp2/"

    @delayed_cached(folder=another_cache_dir)
    def load_data():
        return 1

    clear_cache(another_cache_dir)
    r = load_data()
    _ = delayed_compute((r,))

    assert (Path(another_cache_dir) / "load_data.pickle").exists()
示例#7
0
def test_dask_pipeline_with_parameters_private():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_1(_ts: dt.datetime):
        time.sleep(1)
        assert _ts > dt.datetime(2019, 1, 1)
        return 5

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_2(fix: float):
        time.sleep(1)
        return 3 + fix

    @delayed()
    @cached(folder=CACHE_DIR)
    def add(x, y):
        return x + y

    params = DelayedParameters()
    _ts = params.create("_ts", value=dt.datetime(2020, 1, 1))
    fix = params.create("fix", value=0.5)
    d1 = load_data_1(_ts=_ts)
    d2 = load_data_2(fix=fix)
    r = add(d1, d2)

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert 0.95 < delay < 1.95

    with params.context({"_ts": dt.datetime(2020, 2, 1)}):
        start = dt.datetime.utcnow()
        (output,) = delayed_compute((r,))
        delay = (dt.datetime.utcnow() - start).total_seconds()
        assert delay < 0.95
示例#8
0
def test_dask_pipeline_with_parameters_create_many():
    clear_cache(CACHE_DIR)

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_1(ts: dt.datetime):
        assert ts > dt.datetime(2019, 1, 1)
        time.sleep(1)
        return 5

    @delayed()
    @cached(folder=CACHE_DIR)
    def load_data_2(fix: float):
        time.sleep(1)
        return 3 + fix

    @delayed()
    @cached(folder=CACHE_DIR)
    def add(x, y):
        return x + y

    params = DelayedParameters()
    params.create_many(
        {
            "ts": dt.datetime(2020, 1, 1),
            "fix": 0.5,
        }
    )
    print(params.get_params())
    d2 = load_data_2(params.get_delayed("fix"))
    d1 = load_data_1(params.get_delayed("ts"))
    r = add(d1, d2)

    start = dt.datetime.utcnow()
    (output,) = delayed_compute((r,))
    delay = (dt.datetime.utcnow() - start).total_seconds()
    assert 0.95 < delay < 1.95
    assert abs(output - 8.5) < EPS