def test_subdag() -> None: """Test that subdags execute properly.""" def cap(inp: bytes) -> bytes: return inp.upper() def map_reduce( inputs: dict[str, bytes]) -> dict[str, _graph.Artefact[bytes]]: """Basic map reduce.""" inp_data = inputs["inp"].split(b" ") out: list[_graph.Artefact[bytes]] = [] for el in inp_data: out += [morph(cap, el, opt=options(distributed=False))] return {"out": concat(*out, join="-")} with Fun(MockServer(), defaults=options(distributed=False)) as db: dat = put(b"bla bla lol what") inp = {"inp": dat} cmd = _subdag.subdag_funsie(map_reduce, {"inp": Encoding.blob}, {"out": Encoding.blob}) operation = _graph.make_op(db, cmd, inp, options()) out = _graph.Artefact[bytes].grab(db, operation.out["out"]) final = shell( "cat file1 file2", inp=dict(file1=out, file2=b"something"), ) execute(final) data = take(final.stdout) assert data == b"BLA-BLA-LOL-WHATsomething"
def test_error_propagation_shell() -> None: """Test propagation of errors.""" db = Redis() store = RedisStorage(db) s1 = funsies.shell( "cp file1 file3", inp=dict(file1="bla"), out=["file2"], connection=(db, store), opt=options(), ) s2 = funsies.shell( "cat file2", inp=dict(file2=s1.out["file2"]), connection=(db, store), opt=options(), ) s3 = funsies.shell( "cat file2", inp=dict(file2=s1.out["file2"]), strict=False, connection=(db, store), opt=options(), ) run_op(db, store, s1.op.hash) run_op(db, store, s2.op.hash) with pytest.raises(UnwrapError): funsies.take(s2.stderr, connection=(db, store)) run_op(db, store, s3.op.hash) assert funsies.take(s3.stderr, connection=(db, store)) != b"" assert isinstance(funsies.take(s3.returncode, connection=(db, store)), int) assert funsies.take(s3.returncode, connection=(db, store)) != 0
def test_dag_cached() -> None: """Test that DAG caching works.""" serv = MockServer() with Fun(serv, defaults=options(distributed=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2b = shell("echo 'not'", inp=dict(file1=step1)) merge = shell("cat file1 file2", inp=dict(file1=step1, file2=step2b.stdout), out=["file2"]) execute(merge) with Fun(serv, defaults=options(distributed=False, evaluate=False)): # Same as above, should run through with no evaluation dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2b = shell("echo 'not'", inp=dict(file1=step1)) merge = shell("cat file1 file2", inp=dict(file1=step1, file2=step2b.stdout), out=["file2"]) execute(merge) with Fun(serv, defaults=options(distributed=False, evaluate=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) # DIFFERENT HERE: Trigger re-evaluation and raise step2b = shell("echo 'knot'", inp=dict(file1=step1)) merge = shell("cat file1 file2", inp=dict(file1=step1, file2=step2b.stdout), out=["file2"]) with pytest.raises(RuntimeError): execute(merge)
def test_artefact_add_implicit() -> None: """Test adding implicit artefacts.""" options() server = MockServer() db, store = server.new_connection() art = _graph.variable_artefact(db, hash_t("1"), "file", Encoding.blob) out = _graph.get_data(db, store, art) assert isinstance(out, Error) assert out.kind == ErrorKind.NotFound
def test_artefact_add() -> None: """Test adding const artefacts.""" options() server = MockServer() db, store = server.new_connection() a = _graph.constant_artefact(db, store, b"bla bla") b = _graph.Artefact[bytes].grab(db, a.hash) c = _graph.get_data(db, store, a) assert b is not None assert a == b assert c == b"bla bla"
def test_dependencies() -> None: """Test cached result.""" opt = options() serv = MockServer() db, store = serv.new_connection() cmd = p.python_funsie(capitalize, {"inp": Encoding.json}, {"inp": Encoding.json}, name="capit") cmd2 = p.python_funsie(uncapitalize, {"inp": Encoding.json}, {"inp": Encoding.json}, name="uncap") operation = _graph.make_op( db, cmd, inp={"inp": _graph.constant_artefact(db, store, "bla bla")}, opt=opt) operation2 = _graph.make_op( db, cmd2, inp={"inp": _graph.Artefact.grab(db, operation.out["inp"])}, opt=opt) status = run_op(db, store, operation2.hash) assert status == RunStatus.unmet_dependencies status = run_op(db, store, operation.hash) assert status == RunStatus.executed status = run_op(db, store, operation2.hash) assert status == RunStatus.executed
def test_worker_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' gets killed.""" # std import os def kill_funsies_worker(*inp: bytes) -> bytes: pid = os.getppid() os.kill(pid, sig) time.sleep(1.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_funsies_worker, b"bla bla", b"bla bla", opt=f.options(timeout=5)) s1b = f.morph(cap, s1) f.execute(s1b) if nworkers == 1: # no other workers to pick up the slack with pytest.raises(TimeoutError): f.wait_for(s1b, timeout=1) else: # everything is ok f.wait_for(s1b, timeout=5) assert f.take(s1b) == b"WHAT"
def test_job_killed(nworkers: int, sig: int) -> None: """Test what happens when 'funsies worker' is ok but its job gets killed.""" # std import os def kill_self(*inp: bytes) -> bytes: pid = os.getpid() os.kill(pid, sig) time.sleep(2.0) return b"what" def cap(inp: bytes) -> bytes: return inp.upper() with f.ManagedFun(nworkers=nworkers) as db: wait_for_workers(db, nworkers) s1 = f.reduce(kill_self, b"bla bla", b"bla bla", opt=f.options(timeout=3)) s1b = f.morph(cap, s1) f.execute(s1b) # error f.wait_for(s1b, timeout=1) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.KilledBySignal
def test_rm() -> None: """Test rm.""" with Fun(MockServer(), options(distributed=False)): dat = ui.put("bla bla") # removing const artefact raises with pytest.raises(AttributeError): ui.reset(dat) ui.take(dat) def upper(x: str) -> str: return x.upper() m1 = fp.morph(upper, dat) m2 = fp.morph(lambda x: x + x, m1) ui.execute(m2) assert ui.take(m2) == "BLA BLABLA BLA" ui.reset(m1) with pytest.raises(UnwrapError): # deletion works ui.take(m1) with pytest.raises(UnwrapError): # and it's recursive ui.take(m2) # re run ui.execute(m2) assert ui.take(m2) == "BLA BLABLA BLA"
def test_operation_pack() -> None: """Test packing and unpacking of operations.""" opt = options() server = MockServer() db, store = server.new_connection() a = _graph.constant_artefact(db, store, b"bla bla") b = _graph.constant_artefact(db, store, b"bla bla bla") fun = f.Funsie( how=f.FunsieHow.shell, what="cat infile", inp={"infile": Encoding.blob}, out={"out": Encoding.json}, extra={}, ) op = _graph.make_op(db, fun, {"infile": a}, opt) op2 = _graph.Operation.grab(db, op.hash) assert op == op2 with pytest.raises(AttributeError): op = _graph.make_op(db, fun, {}, opt) with pytest.raises(AttributeError): # no inputs op = _graph.make_op(db, fun, {}, opt) with pytest.raises(AttributeError): # too many inputs op = _graph.make_op(db, fun, {"infile": a, "infile2": b}, opt) with pytest.raises(RuntimeError): op = _graph.Operation.grab(db, hash_t("b"))
def test_toposort() -> None: """Test that we can topologically sort the subset.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) # random not included ops stepA = shell("echo 'bla'") _ = concat(dat, dat) _ = morph(capitalize, b"another word") final = shell("cat file1 file2", inp={ "file1": stepA.stdout, "file2": step2.stdout }) ops = _p._parametrize_subgraph(db, {"input": dat}, {"output": final.stdout}) edges = _p._subgraph_edges(db, ops) sorted_ops = _p._subgraph_toposort(ops, edges) assert sorted_ops[0] == step1.parent assert sorted_ops[1] == step2.hash assert sorted_ops[2] == final.hash
def test_dag_dump() -> None: """Test simple DAG dump to file.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") dat2 = put(b"blaXbla") errorstep = morph(raises, dat2) step1 = morph(upper, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) step2b = utils.concat(step2.stdout, errorstep, strict=False) step3 = shell("cat file1", inp=dict(file1=step2b)) step4 = shell("cat file1", inp=dict(file1=step1)) step4b = shell("cat file2", inp=dict(file2=step4.stdout)) out = utils.concat(step1, dat, step2.stdout, step3.stdout) _dag.build_dag(db, out.hash) execute(step2b) execute(step4b) wait_for(step4b, 1.0) reset(step4) nodes, artefacts, labels, links = _graphviz.export( db, [out.hash, step4b.hash]) dot = _graphviz.format_dot(nodes, artefacts, labels, links, [out.hash, step4b.hash]) assert len(dot) > 0 assert len(nodes) == 8 assert len(labels) == 8 # TODO pass through dot for testing? with open("g.dot", "w") as f: f.write(dot)
def test_subgraph() -> None: """Test that we can isolate the required operators for parametrization.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) # random not included ops stepA = shell("echo 'bla'") _ = concat(dat, dat) _ = morph(capitalize, b"another word") final = shell("cat file1 file2", inp={ "file1": stepA.stdout, "file2": step2.stdout }) ops = _p._parametrize_subgraph(db, {"input": dat}, {"output": final.stdout}) assert len(ops) == 3 assert step1.parent in ops assert step2.hash in ops assert final.hash in ops # get edges edges = _p._subgraph_edges(db, ops) print(edges)
def map_reduce( inputs: dict[str, bytes]) -> dict[str, _graph.Artefact[bytes]]: """Basic map reduce.""" inp_data = inputs["inp"].split(b" ") out: list[_graph.Artefact[bytes]] = [] for el in inp_data: out += [morph(cap, el, opt=options(distributed=False))] return {"out": concat(*out, join="-")}
def test_parametric_store_recall() -> None: """Test storing and recalling parametrics.""" serv = MockServer() with Fun(serv, options(distributed=False)): a = put(3) b = put(4) s = reduce(lambda x, y: x + y, a, b) s2 = morph(lambda x: 3 * x, s) execute(s2) assert take(s2) == 21 # parametrize p.commit("math", inp=dict(a=a, b=b), out=dict(s=s, s2=s2)) with Fun(serv, options(distributed=False)): out = p.recall("math", inp=dict(a=5, b=8)) execute(out["s2"]) assert take(out["s2"]) == 39
def test_dag_execute() -> None: """Test execution of a _dag.""" with Fun(MockServer(), defaults=options(distributed=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) output = step2.stdout # make queue execute(output) out = take(output) assert out == b"BLA BLAbla bla"
def test_timeout_deadlock() -> None: """Test funsies that time out. Here we explicitly check if dependents are still enqueued or if the whole thing deadlocks. """ def timeout_fun(*inp: str) -> bytes: time.sleep(3.0) return b"what" def cap(inp: bytes) -> bytes: return inp.capitalize() with f.ManagedFun(nworkers=2): # Test when python function times out s1 = f.reduce(timeout_fun, "bla bla", "bla bla", opt=f.options(timeout=1)) s1b = f.morph(cap, s1) # Test when shell function times out s2 = f.shell("sleep 20", "echo 'bla bla'", opt=f.options(timeout=1)) s2b = f.morph(cap, s2.stdouts[1]) f.execute(s1b, s2b) # Check err for reduce f.wait_for(s1b, timeout=1.5) err = f.take(s1b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s1.parent # Check err for shell f.wait_for(s2b, timeout=1.5) err = f.take(s2b, strict=False) assert isinstance(err, f.errors.Error) assert err.kind == f.errors.ErrorKind.JobTimedOut assert err.source == s2.hash
def test_parametric_store_recall_optional() -> None: """Test storing a parametric with optional parameters.""" serv = MockServer() with Fun(serv, options(distributed=False)): a = put(3) b = put("fun") s = reduce(lambda x, y: x * y, a, b) s2 = morph(lambda x: x.upper(), s) # parametrize p.commit("fun", inp=dict(a=a, b=b), out=dict(s=s2)) with Fun(serv, options(distributed=False)): out = p.recall("fun", inp=dict(a=5)) execute(out["s"]) assert take(out["s"]) == "FUNFUNFUNFUNFUN" # nested out = p.recall("fun", inp=dict(b="lol")) out = p.recall("fun", inp=dict(b=out["s"], a=2)) execute(out["s"]) assert take(out["s"]) == "LOLLOLLOLLOLLOLLOL"
def test_dag_execute_same_root() -> None: """Test execution of two dags that share the same origin.""" with Fun(MockServer(), defaults=options(distributed=False)): dat = put(b"bla bla") step1 = morph(lambda x: x.decode().upper().encode(), dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) step2b = shell("cat file1", inp=dict(file1=step1)) execute(step2) out = take(step2.stdout) assert out == b"BLA BLAbla bla" execute(step2b) out = take(step2b.stdout) assert out == b"BLA BLA"
def test_parametric() -> None: """Test that parametric DAGs work.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) final = shell("cat file1 file3", inp={ "file1": step1, "file3": step2.stdout }) param = _p.make_parametric(db, "param", {"input": dat}, {"output": final.stdout}) param2 = _p.Parametric.grab(db, param.hash) assert param == param2
def test_cached_run() -> None: """Test cached result.""" opt = options() serv = MockServer() db, store = serv.new_connection() cmd = p.python_funsie(capitalize, {"inp": Encoding.json}, {"inp": Encoding.json}, name="capit") inp = {"inp": _graph.constant_artefact(db, store, "bla bla")} operation = _graph.make_op(db, cmd, inp, opt) status = run_op(db, store, operation.hash) # test return values assert status == RunStatus.executed status = run_op(db, store, operation.hash) assert status == RunStatus.using_cached
def test_subdag() -> None: """Test run of a subdag function.""" # funsies import funsies as f opt = options() serv = MockServer() with f.Fun(serv): db, store = _context.get_connection() def map_reduce(inputs: Dict[str, bytes]) -> Dict[str, _graph.Artefact]: """Basic map reduce.""" inp_data = inputs["inp"].split(b" ") for el in inp_data: out = f.morph(lambda x: x.upper(), el, opt=options()) return {"out": f.utils.concat(out, join="-")} cmd = sub.subdag_funsie(map_reduce, {"inp": Encoding.blob}, {"out": Encoding.blob}) inp = {"inp": _graph.constant_artefact(db, store, b"bla bla blo lol")} operation = _graph.make_op(db, cmd, inp, opt) status = run_op(db, store, operation.hash) # test return values assert status == RunStatus.subdag_ready # test output data dat = _graph.get_data( db, store, _graph.Artefact[bytes].grab(db, operation.out["out"]), do_resolve_link=False, ) assert isinstance(dat, f.errors.Error) assert dat.kind == "UnresolvedLink" datl = _graph.get_data( db, store, _graph.Artefact[bytes].grab(db, operation.out["out"]), do_resolve_link=True, ) assert isinstance(datl, f.errors.Error) assert datl.kind == "NotFound"
def test_exec_all() -> None: """Test execute_all.""" with Fun(MockServer(), defaults=options(distributed=False)): results = [] def div_by(x: float) -> float: return 10.0 / x for i in range(10, -1, -1): val = put(float(i)) results += [morph(div_by, val)] with pytest.raises(UnwrapError): take(results[0]) err = utils.execute_all(results) print(take(results[0])) v = take(err, strict=False) assert isinstance(v, Error) assert v.kind == ErrorKind.ExceptionRaised
def test_parametric_eval() -> None: """Test that parametric evaluate properly.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) final = shell("cat file1 file3", inp={ "file1": step1, "file3": step2.stdout }) execute(final.stdout) # b'BLA BLABLA BLAbla bla' param = _p.make_parametric(db, "param", {"input": dat}, {"output": final.stdout}) dat2 = put(b"lol lol") out = param.evaluate(db, {"input": dat2}) execute(out["output"]) assert take(out["output"]) == b"LOL LOLLOL LOLlol lol"
def test_map_reduce() -> None: """Test simple map-reduce.""" def split(a: bytes, b: bytes) -> list[dict[str, int]]: a = a.split() b = b.split() out = [] for ia, ib in zip(a, b): out += [{ "sum": int(ia.decode()) + int(ib.decode()), "product": int(ia.decode()) * int(ib.decode()), }] return out def apply(inp: Artefact) -> Artefact: out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp) return out def combine(inp: Sequence[Artefact]) -> Artefact: out = [ funsies.morph(lambda y: y.encode(), x, out=Encoding.blob) for x in inp ] return funsies.utils.concat(*out) with funsies.Fun(MockServer(), funsies.options(distributed=False)): num1 = funsies.put(b"1 2 3 4 5") num2 = funsies.put(b"11 10 11 10 11") outputs = dynamic.sac( split, apply, combine, num1, num2, out=Encoding.blob, ) funsies.execute(outputs) assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
def test_shell_run() -> None: """Test run on a shell command.""" opt = options() serv = MockServer() db, store = serv.new_connection() cmd = s.shell_funsie(["cat file1"], {"file1": Encoding.blob}, []) inp = {"file1": _graph.constant_artefact(db, store, b"bla bla")} operation = _graph.make_op(db, cmd, inp, opt) status = run_op(db, store, operation.hash) # test return values assert status == RunStatus.executed # check data is good dat = _graph.get_data( db, store, _graph.Artefact[bytes].grab(db, operation.inp["file1"])) assert dat == b"bla bla" dat = _graph.get_data( db, store, _graph.Artefact[bytes].grab(db, operation.out[f"{s.STDOUT}0"])) assert dat == b"bla bla"
def test_pyfunc_run() -> None: """Test run on a python function.""" opt = options() serv = MockServer() db, store = serv.new_connection() cmd = p.python_funsie(capitalize, {"inp": Encoding.json}, {"inp": Encoding.json}, name="capit") inp = {"inp": _graph.constant_artefact(db, store, "bla bla")} operation = _graph.make_op(db, cmd, inp, opt) status = run_op(db, store, operation.hash) # test return values assert status == RunStatus.executed # check data is good dat = _graph.get_data(db, store, _graph.Artefact[str].grab(db, operation.inp["inp"])) assert dat == "bla bla" dat = _graph.get_data(db, store, _graph.Artefact[str].grab(db, operation.out["inp"])) assert dat == "BLA BLA"
def test_parametrize() -> None: """Test that parametrization works.""" with Fun(MockServer(), options(distributed=False)) as db: dat = put(b"bla bla") dat2 = put(b"bla bla bla") step1 = morph(capitalize, dat) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat)) final = shell("cat file1 file3", inp={ "file1": step1, "file3": step2.stdout }) pinp = {"input": dat} pout = {"final.stdout": final.stdout, "step1": step1} new_inp = {"input": dat2} ops = _p._parametrize_subgraph(db, pinp, pout) edges = _p._subgraph_edges(db, ops) sorted_ops = _p._subgraph_toposort(ops, edges) pinp2 = dict([(k, v.hash) for k, v in pinp.items()]) pout2 = dict([(k, v.hash) for k, v in pout.items()]) new_out = _p._do_parametrize(db, sorted_ops, pinp2, pout2, new_inp) # re-run with dat2, check if the same. step1 = morph(capitalize, dat2) step2 = shell("cat file1 file2", inp=dict(file1=step1, file2=dat2)) final = shell("cat file1 file3", inp={ "file1": step1, "file3": step2.stdout }) assert new_out["final.stdout"] == final.stdout assert new_out["step1"] == step1
def map_reduce(inputs: Dict[str, bytes]) -> Dict[str, _graph.Artefact]: """Basic map reduce.""" inp_data = inputs["inp"].split(b" ") for el in inp_data: out = f.morph(lambda x: x.upper(), el, opt=options()) return {"out": f.utils.concat(out, join="-")}