def test_error_propagation_morph() -> None: """Test propagation of errors.""" with Fun(MockServer()): db, store = get_connection() s1 = funsies.shell("cp file1 file3", inp=dict(file1="bla"), out=["file2"]) def fun_strict(inp: bytes) -> bytes: return inp def fun_lax(inp: Result[bytes]) -> bytes: return b"bla bla" s2 = funsies.morph(fun_strict, s1.out["file2"]) s3 = funsies.morph(fun_lax, s1.out["file2"]) s4 = funsies.morph(fun_lax, s1.out["file2"], strict=False) run_op(db, store, s1.op.hash) run_op(db, store, s2.parent) out = funsies.take(s2, strict=False) assert isinstance(out, Error) assert out.source == s1.op.hash print(s3.parent) run_op(db, store, s3.parent) out = funsies.take(s3, strict=False) assert isinstance(out, Error) assert out.source == s1.op.hash run_op(db, store, s4.parent) out = funsies.take(s4) assert out == b"bla bla"
def test_error_propagation_shell() -> None: """Test propagation of errors.""" db = Redis() store = RedisStorage(db) s1 = funsies.shell( "cp file1 file3", inp=dict(file1="bla"), out=["file2"], connection=(db, store), opt=options(), ) s2 = funsies.shell( "cat file2", inp=dict(file2=s1.out["file2"]), connection=(db, store), opt=options(), ) s3 = funsies.shell( "cat file2", inp=dict(file2=s1.out["file2"]), strict=False, connection=(db, store), opt=options(), ) run_op(db, store, s1.op.hash) run_op(db, store, s2.op.hash) with pytest.raises(UnwrapError): funsies.take(s2.stderr, connection=(db, store)) run_op(db, store, s3.op.hash) assert funsies.take(s3.stderr, connection=(db, store)) != b"" assert isinstance(funsies.take(s3.returncode, connection=(db, store)), int) assert funsies.take(s3.returncode, connection=(db, store)) != 0
def test_not_generated() -> None: """What happens when an artefact is not generated?""" with Fun(MockServer()): db, store = get_connection() s = funsies.shell("cp file1 file2", inp=dict(file1="bla"), out=["file3"]) run_op(db, store, s.op.hash) assert funsies.take(s.returncode) == 0 with pytest.raises(UnwrapError): funsies.take(s.out["file3"])
def test_concat() -> None: """Test concatenation.""" with Fun(MockServer()): db, store = get_connection() dat1 = put(b"bla") dat2 = put(b"bla") cat = utils.concat(dat1, dat2) run_op(db, store, cat.parent) assert take(cat) == b"blabla" cat = utils.concat(dat1, dat1, dat1, join=b" ") run_op(db, store, cat.parent) assert take(cat) == b"bla bla bla"
def test_dag_execute_same_root() -> None: """Test execution of two dags that share the same origin.""" with Fun(MockServer(), defaults=options(distributed=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) step2b = shell("cat file1", inp=dict(file1=step1)) execute(step2) out = take(step2.stdout) assert out == b"BLA BLAbla bla" execute(step2b) out = take(step2b.stdout) assert out == b"BLA BLA"
def test_double_execution(nworkers: int) -> None: """Test multiple executions of the same task.""" # This test will fail if a job is re-executed multiple times. # external from rq.job import get_current_job def track_runs(inp: bytes) -> bytes: job = get_current_job() db: Redis[bytes] = job.connection val = db.incrby("sentinel", 1) time.sleep(0.5) return str(val).encode() with f.ManagedFun(nworkers=nworkers): # wait_for_workers(db, nworkers) dat = f.put(b"bla bla") step1 = f.morph(track_runs, dat) step1a = f.shell( "cat file1", inp=dict(file1=step1), ) step1b = f.shell( "cat file2", inp=dict(file2=step1), ) f.execute(step1a) f.execute(step1b) f.wait_for(step1a, timeout=10.0) f.wait_for(step1b, timeout=10.0) assert f.take(step1a.stdout) == b"1"
def test_job_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' is ok but its job gets killed.""" # std import os def kill_self(*inp: bytes) -> bytes: pid = os.getpid() os.kill(pid, sig) time.sleep(2.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_self, b"bla bla", b"bla bla", opt=f.options(timeout=3)) s1b = f.morph(cap, s1) f.execute(s1b) # error f.wait_for(s1b, timeout=1) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.KilledBySignal
def test_worker_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' gets killed.""" # std import os def kill_funsies_worker(*inp: bytes) -> bytes: pid = os.getppid() os.kill(pid, sig) time.sleep(1.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_funsies_worker, b"bla bla", b"bla bla", opt=f.options(timeout=5)) s1b = f.morph(cap, s1) f.execute(s1b) if nworkers == 1: # no other workers to pick up the slack with pytest.raises(TimeoutError): f.wait_for(s1b, timeout=1) else: # everything is ok f.wait_for(s1b, timeout=5) assert f.take(s1b) == b"WHAT"
def test_subdag() -> None: """Test that subdags execute properly.""" def cap(inp: bytes) -> bytes: return inp.upper() def map_reduce( inputs: dict[str, bytes]) -> dict[str, _graph.Artefact[bytes]]: """Basic map reduce.""" inp_data = inputs["inp"].split(b" ") out: list[_graph.Artefact[bytes]] = [] for el in inp_data: out += [morph(cap, el, opt=options(distributed=False))] return {"out": concat(*out, join="-")} with Fun(MockServer(), defaults=options(distributed=False)) as db: dat = put(b"bla bla lol what") inp = {"inp": dat} cmd = _subdag.subdag_funsie(map_reduce, {"inp": Encoding.blob}, {"out": Encoding.blob}) operation = _graph.make_op(db, cmd, inp, options()) out = _graph.Artefact[bytes].grab(db, operation.out["out"]) final = shell( "cat file1 file2", inp=dict(file1=out, file2=b"something"), ) execute(final) data = take(final.stdout) assert data == b"BLA-BLA-LOL-WHATsomething"
def test_template() -> None: """Basic test of chevron templating.""" with Fun(MockServer()): db, store = get_connection() t = "Hello, {{ mustache }}!" result = template(t, {"mustache": "world"}) run_op(db, store, result.parent) assert take(result) == b"Hello, world!"
def test_template_complicated() -> None: """Test templating with funky types.""" with Fun(MockServer()): db, store = get_connection() t = "wazzaa, {{ mustache }}!" result = template(t, {"mustache": put(b"people")}) run_op(db, store, result.parent) assert take(result) == b"wazzaa, people!" t = "{{a}}{{b}}{{c}}" result = template(t, dict(a=2, b="cool", c="4me")) run_op(db, store, result.parent) assert take(result) == b"2cool4me" t = "" result = template(t, dict(a=2, b="cool", c="4me")) run_op(db, store, result.parent) assert take(result) == b""
def test_truncate() -> None: """Test truncation.""" with Fun(MockServer()): db, store = get_connection() inp = "\n".join([f"{k}" for k in range(10)]) dat1 = put(inp.encode()) trunc = utils.truncate(dat1, 2, 3) run_op(db, store, trunc.parent) assert take(trunc) == ("\n".join(inp.split("\n")[2:-3])).encode()
def test_parametric_store_recall() -> None: """Test storing and recalling parametrics.""" serv = MockServer() with Fun(serv, options(distributed=False)): a = put(3) b = put(4) s = reduce(lambda x, y: x + y, a, b) s2 = morph(lambda x: 3 * x, s) execute(s2) assert take(s2) == 21 # parametrize p.commit("math", inp=dict(a=a, b=b), out=dict(s=s, s2=s2)) with Fun(serv, options(distributed=False)): out = p.recall("math", inp=dict(a=5, b=8)) execute(out["s2"]) assert take(out["s2"]) == 39
def test_exec_all() -> None: """Test execute_all.""" with Fun(MockServer(), defaults=options(distributed=False)): results = [] def div_by(x: float) -> float: return 10.0 / x for i in range(10, -1, -1): val = put(float(i)) results += [morph(div_by, val)] with pytest.raises(UnwrapError): take(results[0]) err = utils.execute_all(results) print(take(results[0])) v = take(err, strict=False) assert isinstance(v, Error) assert v.kind == ErrorKind.ExceptionRaised
def test_dag_execute() -> None: """Test execution of a _dag.""" with Fun(MockServer(), defaults=options(distributed=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) output = step2.stdout # make queue execute(output) out = take(output) assert out == b"BLA BLAbla bla"
def test_artefact_disk_distributed() -> None: """Test whether artefacts on disk works on different nodes.""" # funsies import funsies as f with tempfile.TemporaryDirectory() as td: with f.ManagedFun(nworkers=1, data_url=f"file://{td}"): dat = f.put(b"bla bla") step1 = f.morph(lambda x: x.decode().upper().encode(), dat) step2 = f.shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) step2b = f.shell("cat file1", inp=dict(file1=step1)) f.execute(step2) f.wait_for(step2, 1.0) out = f.take(step2.stdout) assert out == b"BLA BLAbla bla" f.execute(step2b) f.wait_for(step2b, 1.0) out = f.take(step2b.stdout) assert out == b"BLA BLA"
def test_timeout_deadlock() -> None: """Test funsies that time out. Here we explicitly check if dependents are still enqueued or if the whole thing deadlocks. """ def timeout_fun(*inp: str) -> bytes: time.sleep(3.0) return b"what" def cap(inp: bytes) -> bytes: return inp.capitalize() with f.ManagedFun(nworkers=2): # Test when python function times out s1 = f.reduce(timeout_fun, "bla bla", "bla bla", opt=f.options(timeout=1)) s1b = f.morph(cap, s1) # Test when shell function times out s2 = f.shell("sleep 20", "echo 'bla bla'", opt=f.options(timeout=1)) s2b = f.morph(cap, s2.stdouts[1]) f.execute(s1b, s2b) # Check err for reduce f.wait_for(s1b, timeout=1.5) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s1.parent # Check err for shell f.wait_for(s2b, timeout=1.5) err = f.take(s2b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s2.hash
def test_parametric_store_recall_optional() -> None: """Test storing a parametric with optional parameters.""" serv = MockServer() with Fun(serv, options(distributed=False)): a = put(3) b = put("fun") s = reduce(lambda x, y: x * y, a, b) s2 = morph(lambda x: x.upper(), s) # parametrize p.commit("fun", inp=dict(a=a, b=b), out=dict(s=s2)) with Fun(serv, options(distributed=False)): out = p.recall("fun", inp=dict(a=5)) execute(out["s"]) assert take(out["s"]) == "FUNFUNFUNFUNFUN" # nested out = p.recall("fun", inp=dict(b="lol")) out = p.recall("fun", inp=dict(b=out["s"], a=2)) execute(out["s"]) assert take(out["s"]) == "LOLLOLLOLLOLLOLLOL"
def test_error_propagation() -> None: """Test propagation of errors.""" with Fun(MockServer()): db, store = get_connection() s1 = funsies.shell("cp file1 file3", inp=dict(file1="bla"), out=["file2"]) s2 = funsies.shell("cat file1 file2", inp=dict(file1="a file", file2=s1.out["file2"])) run_op(db, store, s1.op.hash) run_op(db, store, s2.op.hash) out = funsies.take(s2.stdout, strict=False) print(out) assert isinstance(out, Error) assert out.source == s1.op.hash
def test_dynamic_dump() -> None: """Test whether a dynamic DAG gets graphed properly.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact[dict[str, Any]]) -> Artefact[str]: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact[str]]) -> Artefact[bytes]: def enc(inp: str) -> bytes: return inp.encode() out = [funsies.morph(enc, x, out=Encoding.blob) for x in inp] return funsies.utils.concat(*out) with funsies.ManagedFun(nworkers=1) as db: num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) outputs = funsies.morph(lambda x: x, outputs) nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash]) assert len(artefacts) == 4 # not yet generated subdag parents print(len(artefacts)) funsies.execute(outputs) funsies.wait_for(outputs, timeout=1.0) nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash]) assert len(artefacts) == 22 # generated subdag parents assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def cat(ctx: click.Context, hashes: tuple[str, ...]) -> None: """Print artefacts to stdout.""" with funsies._context.Fun(ctx.obj): for hash in hashes: logger.info(f"extracting {hash}") things = funsies.get(hash) if len(things) == 0: logger.error("hash does not correspond to anything!") raise SystemExit(2) if len(things) > 1: logger.error(f"hash resolves to {len(things)} things.") art = things[0] if isinstance(art, types.Artefact): res = funsies.take(art, strict=False) if isinstance(res, types.Error): logger.warning(f"error at {hash}: {res.kind}") if res.details is not None: sys.stderr.buffer.write((res.details + "\n").encode()) logger.warning(f"error source: {res.source}") elif isinstance(res, bytes): sys.stdout.buffer.write(res) logger.success(f"{hash} output to stdout") elif isinstance(res, str): sys.stdout.buffer.write(res.encode()) logger.success(f"{hash} output to stdout") else: sys.stdout.buffer.write(json.dumps(res).encode()) logger.success(f"{hash} output to stdout") elif isinstance(art, types.Operation): logger.error("not an artefact") logger.info("did you mean...") sys.stderr.write(" INPUTS:\n") for key, val in art.inp.items(): sys.stderr.write(f" {key:<30} -> {val[:8]}\n") sys.stderr.write(" OUTPUTS:\n") for key, val in art.out.items(): sys.stderr.write(f" {key:<30} -> {val[:8]}\n") else: logger.error("not an artefact:") logger.error(f"{art}")
def test_parametric_eval() -> None: """Test that parametric evaluate properly.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) final = shell("cat file1 file3", inp={ "file1": step1, "file3": step2.stdout }) execute(final.stdout) # b'BLA BLABLA BLAbla bla' param = _p.make_parametric(db, "param", {"input": dat}, {"output": final.stdout}) dat2 = put(b"lol lol") out = param.evaluate(db, {"input": dat2}) execute(out["output"]) assert take(out["output"]) == b"LOL LOLLOL LOLlol lol"
def test_raising_funsie() -> None: """Test funsie that raises an error. This test is specifically designed to catch the bug fixed in fa9af6a4 where funsies that raised did not release their locks, leading to a race condition. """ def raising_fun(*inp: str) -> bytes: raise RuntimeError("this funsie raises.") with f.ManagedFun(nworkers=2): s0a = f.morph(lambda x: x, "bla blabla") s0b = f.morph(lambda x: x, "blala") s1 = f.reduce(raising_fun, "bla bla", s0a, s0b, strict=True) f.execute(s1) f.wait_for(s1, timeout=2) with pytest.raises(UnwrapError): _ = f.take(s1) s2 = f.morph(lambda x: x, s1) f.execute(s2) f.wait_for(s2, timeout=0.5)
def test_error_tolerant() -> None: """Test error tolerant funsie.""" def error_tolerant_fun(inp: Result[bytes]) -> bytes: if isinstance(inp, Error): return b"err" else: return b"" with Fun(MockServer()): db, store = get_connection() s1 = funsies.shell("cp file1 file3", inp=dict(file1="bla"), out=["file2"]) s2 = funsies.morph(error_tolerant_fun, s1.out["file2"], strict=False) with pytest.raises(RuntimeError): # Test operation not found run_op(db, store, s2.hash) run_op(db, store, s1.op) run_op(db, store, s2.parent) assert funsies.take(s2) == b"err"
def test_waiting_on_map_reduce() -> None: """Test waiting on the (linked) result of map-reduce.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact) -> Artefact: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: y.encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out) with funsies.ManagedFun(nworkers=1): num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) funsies.execute(outputs) funsies.wait_for(outputs, timeout=1.0) assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def test_map_reduce() -> None: """Test simple map-reduce.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact) -> Artefact: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: y.encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out) with funsies.Fun(MockServer(), funsies.options(distributed=False)): num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) funsies.execute(outputs) assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def test_integration(reference: str, nworkers: int) -> None: """Test full integration.""" # make a temp file and copy reference database dir = tempfile.mkdtemp() if not make_reference: shutil.copy(os.path.join(ref_dir, reference, "appendonly.aof"), dir) shutil.copy(os.path.join(ref_dir, "redis.conf"), dir) # Dictionary for test data test_data = {} # Start funsie script with ManagedFun(nworkers=nworkers, directory=dir, redis_args=["redis.conf"]): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2 = shell( "cat file1 file2; grep 'bla' file2 file1 > file3; date >> file3", inp=dict(file1=step1, file2=dat), out=["file2", "file3"], ) echo = shell("sleep 1", "date") merge = reduce( join_bytes, step2.out["file3"], echo.stdouts[1], name="merger", ) def tolist(x: bytes, y: bytes) -> Dict[int, str]: return {1: x.decode(), 8: y.decode()} A = py(tolist, merge, echo.stdouts[1]) test_data["test1"] = A def raises(inp: bytes) -> bytes: raise RuntimeError("an error was raised") def error_count(*inp: Result[bytes]) -> bytes: out = utils.match_results(inp, lambda x: 0, lambda x: 1) return str(sum(out)).encode() err = morph(raises, dat) count = reduce(error_count, dat, dat, err, dat, err, err, echo.stdouts[0], strict=False) cat = utils.concat(merge, dat, err, count, echo.stdouts[1], strict=False) test_data["test2"] = cat execute(step1) wait_for(step1, timeout=10.0) execute(step2) wait_for(step2, timeout=10.0) assert take(step1) == b"BLA BLA" assert take(step2.stdout) == b"BLA BLAbla bla" if make_reference: folder = os.path.join(ref_dir, reference) os.makedirs(folder, exist_ok=True) for name, artefact in test_data.items(): with open(os.path.join(folder, name), "wb") as f: execute(artefact) wait_for(artefact, 10.0) out = take(artefact) data2 = _serdes.encode(artefact.kind, out) assert isinstance(data2, bytes) f.write(data2) shutil.copy( os.path.join(dir, "appendonly.aof"), os.path.join(folder, "appendonly.aof"), ) else: # Test against reference dbs for name, artefact in test_data.items(): execute(artefact) wait_for(artefact, 10.0) with open(os.path.join(ref_dir, reference, name), "rb") as f: data = f.read() out = take(artefact) data_ref = _serdes.encode(artefact.kind, out) assert isinstance(data_ref, bytes) assert data == data_ref shutil.rmtree(dir)
# Recursive application of merge sort # split -> generates two list or raises # recurse(x) for each values of split # merge(left, right) split, lambda element: funsies_mergesort(element), lambda lr: f.reduce(merge, lr[0], lr[1]), # type:ignore art, out=Encoding.json, ) return f.reduce( # if the subdag fails, it's because split raised. In this case, we # just forward the arguments ignore_error, result, art, strict=False, out=Encoding.json, ) # run the workflow to_be_sorted = [random.randint(0, 99) for _ in range(120)] with f.Fun(): inp = f.put(to_be_sorted) out = funsies_mergesort(inp) print("output:", out.hash) f.execute(out) f.wait_for(out) print(f.take(out))
def test_integration(reference: str, nworkers: int) -> None: """Test full integration.""" # make a temp file and copy reference database dir = tempfile.mkdtemp() if not make_reference: shutil.copy(os.path.join(ref_dir, reference, "appendonly.aof"), dir) shutil.copytree(os.path.join(ref_dir, reference, "data"), os.path.join(dir, "data")) shutil.copy(os.path.join(ref_dir, "redis.conf"), dir) # data url datadir = f"file://{os.path.join(dir, 'data')}" # Dictionary for test data test_data: dict[str, Any] = {} def update_data(a: dict[int, int], b: list[int]) -> dict[int, int]: for i in b: a[i] = a.get(i, 0) + 1 return a def sum_data(x: dict[int, int]) -> int: return sum([int(k) * v for k, v in x.items()]) def make_secret(x: int) -> str: return secrets.token_hex(x) # Start funsie script with ManagedFun( nworkers=nworkers, directory=dir, data_url=datadir, redis_args=["redis.conf"], ) as db: integers = put([5, 4, 8, 9, 9, 10, 1, 3]) init_data = put({100: 9}) test_data["init_data"] = init_data nbytes = put(4) s1 = reduce(update_data, init_data, integers) num = morph(sum_data, s1) date = shell("date").stdout test_data["date"] = date rand = morph(make_secret, nbytes) s4 = template( "date:{{date}}\n" + "some random bytes:{{random}}\n" + "a number: {{num}}\n" + "a string: {{string}}\n", { "date": date, "random": rand, "num": num, "string": "wazza" }, name="a template", ) test_data["s4"] = s4 execute(s4) wait_for(s4, 5) # check that the db doesn't itself include data for k in db.keys(): assert b"data" not in k if make_reference: folder = os.path.join(ref_dir, reference) os.makedirs(folder, exist_ok=True) for name, artefact in test_data.items(): with open(os.path.join(folder, name), "wb") as f: execute(artefact) wait_for(artefact, 10.0) out = take(artefact) data2 = _serdes.encode(artefact.kind, out) assert isinstance(data2, bytes) f.write(data2) shutil.copy( os.path.join(dir, "appendonly.aof"), os.path.join(folder, "appendonly.aof"), ) shutil.copytree( os.path.join(dir, "data"), os.path.join(folder, "data"), ) else: # Test against reference dbs for name, artefact in test_data.items(): execute(artefact) wait_for(artefact, 10.0) with open(os.path.join(ref_dir, reference, name), "rb") as f: data = f.read() out = take(artefact) data_ref = _serdes.encode(artefact.kind, out) assert isinstance(data_ref, bytes) assert data == data_ref # delete tempdir shutil.rmtree(dir)
def test_nested_map_reduce(nworkers: int) -> None: """Test nested map-reduce.""" # ------------------------------------------------------------------------ # Inner def sum_inputs(*inp: int) -> int: out = 0 for el in inp: out += el return out def split_inner(inp: str) -> list[int]: a = inp.split(" ") return [int(el) for el in a] def apply_inner(inp: Artefact) -> Artefact: return funsies.reduce(sum_inputs, inp, 1) def combine_inner(inp: Sequence[Artefact]) -> Artefact: return funsies.reduce(sum_inputs, *inp) # ------------------------------------------------------------------------ # outer def split_outer(inp: list[str], fac: int) -> list[str]: out = [x + f" {fac}" for x in inp] return out def apply_outer(inp: Artefact) -> Artefact: outputs = dynamic.sac( split_inner, apply_inner, combine_inner, inp, out=Encoding.json, ) return outputs def combine_outer(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: f"{y}".encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out, join=b",,") with funsies.ManagedFun(nworkers=nworkers): num1 = funsies.put("1 2 3 4 5") outputs = dynamic.sac(split_inner, apply_inner, combine_inner, num1, out=Encoding.json) funsies.execute(outputs) funsies.wait_for(outputs, timeout=30.0) assert funsies.take(outputs) == 20 # Now try the nested one num = funsies.put(["1 2", "3 4 7", "10 12", "1"]) factor = funsies.put(-2) # split -> 1 2 -2|3 4 7 -2|10 12 -2| 1 -2 # apply -> split2 -> 1, 2,-2 | 3,4,7,-2|10,12,-2|1,-2 # apply2 -> 2, 3,-1 | 4,5,8,-1|11,13,-1|2,-1 # combine2 -> 4|16|23|1 # combine -> 4,,16,,23,,1 ans = b"4,,16,,23,,1" outputs = dynamic.sac( split_outer, apply_outer, combine_outer, num, factor, out=Encoding.blob, ) funsies.execute(outputs) funsies.wait_for(outputs, timeout=30.0) assert funsies.take(outputs) == ans