def test_dask_pipeline_with_parameters_2_context(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data_1(ts: dt.datetime): assert ts > dt.datetime(2019, 1, 1) return 5 @delayed() @cached(folder=CACHE_DIR) def load_data_2(fix: float): return 3 + fix @delayed() @cached(folder=CACHE_DIR) def add(x, y): return x + y ts = DelayedParameter("ts", value=dt.datetime(2020, 1, 1)) fix = DelayedParameter("fix", value=0.5) d1 = load_data_1(ts()) d2 = load_data_2(fix()) r = add(d1, d2) (output,) = delayed_compute((r,)) assert abs(output - 8.5) < EPS with ts.context(dt.datetime(2020, 2, 1)), fix.context(1.5): (output,) = delayed_compute((r,)) assert abs(output - 9.5) < EPS (output,) = delayed_compute((r,)) assert abs(output - 8.5) < EPS
def test_dask_pipeline_sequential_runs(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data_1(): time.sleep(1) return 5 @delayed() @cached(folder=CACHE_DIR) def load_data_2(): time.sleep(1) return 3 @delayed() @cached(folder=CACHE_DIR) def add(x, y): return x + y d1 = load_data_1() d2 = load_data_2() r = add(d1, d2) start = dt.datetime.utcnow() d1_, d2_ = delayed_compute((d1, d2)) delay = (dt.datetime.utcnow() - start).total_seconds() assert 0.95 < delay < 1.95 start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay < 0.95 assert output == 8
def test_dask_pipeline_with_parameters_2(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data_1(ts: dt.datetime): assert ts > dt.datetime(2019, 1, 1) time.sleep(1) return 5 @delayed() @cached(folder=CACHE_DIR) def load_data_2(fix: float): time.sleep(1) return 3 + fix @delayed() @cached(folder=CACHE_DIR) def add(x, y): return x + y ts = DelayedParameter("ts", value=dt.datetime(2020, 1, 1)) fix = DelayedParameter("fix", value=0.5) d1 = load_data_1(ts()) d2 = load_data_2(fix()) r = add(d1, d2) start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert 0.95 < delay < 1.95 assert abs(output - 8.5) < EPS start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay < 0.95 assert abs(output - 8.5) < EPS ts.set(dt.datetime(2020, 2, 1)) fix.set(1.5) start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert 0.95 < delay < 1.95 assert abs(output - 9.5) < EPS start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay < 0.95 assert abs(output - 9.5) < EPS
def test_dask_pipeline_multiple_outputs(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data(): # time.sleep(1) return [1, 1, 1, 2, 2, 2] @delayed(nout=2) @cached(folder=CACHE_DIR, nout=2) def split_data(data): return data[:3], data[3:] @delayed() @cached(folder=CACHE_DIR) def compute_sum(arr): # time.sleep(1) return sum(arr) data = load_data() x, y = split_data(data) xsum = compute_sum(x) ysum = compute_sum(y) # start = dt.datetime.utcnow() (x_, y_, xsum_, ysum_) = delayed_compute((x, y, xsum, ysum)) # delay = (dt.datetime.utcnow() - start).total_seconds() # assert 0.95 < delay < 1.95 assert x_ == [1, 1, 1] assert y_ == [2, 2, 2] assert xsum_ == 3 assert ysum_ == 6
def test_delayed_cached_load_time(): @delayed_cached(folder=CACHE_DIR, override=False) def load_data(): time.sleep(1) return 1 clear_cache(CACHE_DIR) start = dt.datetime.utcnow() r = load_data() _ = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay > 0.95 start = dt.datetime.utcnow() r = load_data() _ = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay < 0.95
def test_delayed_cached_another_cache_dir(): another_cache_dir = "cache/temp2/" @delayed_cached(folder=another_cache_dir) def load_data(): return 1 clear_cache(another_cache_dir) r = load_data() _ = delayed_compute((r,)) assert (Path(another_cache_dir) / "load_data.pickle").exists()
def test_dask_pipeline_with_parameters_private(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data_1(_ts: dt.datetime): time.sleep(1) assert _ts > dt.datetime(2019, 1, 1) return 5 @delayed() @cached(folder=CACHE_DIR) def load_data_2(fix: float): time.sleep(1) return 3 + fix @delayed() @cached(folder=CACHE_DIR) def add(x, y): return x + y params = DelayedParameters() _ts = params.create("_ts", value=dt.datetime(2020, 1, 1)) fix = params.create("fix", value=0.5) d1 = load_data_1(_ts=_ts) d2 = load_data_2(fix=fix) r = add(d1, d2) start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert 0.95 < delay < 1.95 with params.context({"_ts": dt.datetime(2020, 2, 1)}): start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert delay < 0.95
def test_dask_pipeline_with_parameters_create_many(): clear_cache(CACHE_DIR) @delayed() @cached(folder=CACHE_DIR) def load_data_1(ts: dt.datetime): assert ts > dt.datetime(2019, 1, 1) time.sleep(1) return 5 @delayed() @cached(folder=CACHE_DIR) def load_data_2(fix: float): time.sleep(1) return 3 + fix @delayed() @cached(folder=CACHE_DIR) def add(x, y): return x + y params = DelayedParameters() params.create_many( { "ts": dt.datetime(2020, 1, 1), "fix": 0.5, } ) print(params.get_params()) d2 = load_data_2(params.get_delayed("fix")) d1 = load_data_1(params.get_delayed("ts")) r = add(d1, d2) start = dt.datetime.utcnow() (output,) = delayed_compute((r,)) delay = (dt.datetime.utcnow() - start).total_seconds() assert 0.95 < delay < 1.95 assert abs(output - 8.5) < EPS