示例#1
0
def test_worker_killed(nworkers: int, sig: int) -> None:
    """Test what happens when 'funsies worker' gets killed."""
    # std
    import os

    def kill_funsies_worker(*inp: bytes) -> bytes:
        pid = os.getppid()
        os.kill(pid, sig)
        time.sleep(1.0)
        return b"what"

    def cap(inp: bytes) -> bytes:
        return inp.upper()

    with f.ManagedFun(nworkers=nworkers) as db:
        wait_for_workers(db, nworkers)
        s1 = f.reduce(kill_funsies_worker,
                      b"bla bla",
                      b"bla bla",
                      opt=f.options(timeout=5))
        s1b = f.morph(cap, s1)
        f.execute(s1b)

        if nworkers == 1:
            # no other workers to pick up the slack
            with pytest.raises(TimeoutError):
                f.wait_for(s1b, timeout=1)
        else:
            # everything is ok
            f.wait_for(s1b, timeout=5)
            assert f.take(s1b) == b"WHAT"
示例#2
0
def test_double_execution(nworkers: int) -> None:
    """Test multiple executions of the same task."""
    # This test will fail if a job is re-executed multiple times.
    # external
    from rq.job import get_current_job

    def track_runs(inp: bytes) -> bytes:
        job = get_current_job()
        db: Redis[bytes] = job.connection
        val = db.incrby("sentinel", 1)
        time.sleep(0.5)
        return str(val).encode()

    with f.ManagedFun(nworkers=nworkers):
        # wait_for_workers(db, nworkers)
        dat = f.put(b"bla bla")
        step1 = f.morph(track_runs, dat)

        step1a = f.shell(
            "cat file1",
            inp=dict(file1=step1),
        )

        step1b = f.shell(
            "cat file2",
            inp=dict(file2=step1),
        )

        f.execute(step1a)
        f.execute(step1b)
        f.wait_for(step1a, timeout=10.0)
        f.wait_for(step1b, timeout=10.0)
        assert f.take(step1a.stdout) == b"1"
示例#3
0
def test_job_killed(nworkers: int, sig: int) -> None:
    """Test what happens when 'funsies worker' is ok but its job gets killed."""
    # std
    import os

    def kill_self(*inp: bytes) -> bytes:
        pid = os.getpid()
        os.kill(pid, sig)
        time.sleep(2.0)
        return b"what"

    def cap(inp: bytes) -> bytes:
        return inp.upper()

    with f.ManagedFun(nworkers=nworkers) as db:
        wait_for_workers(db, nworkers)
        s1 = f.reduce(kill_self,
                      b"bla bla",
                      b"bla bla",
                      opt=f.options(timeout=3))
        s1b = f.morph(cap, s1)
        f.execute(s1b)

        # error
        f.wait_for(s1b, timeout=1)
        err = f.take(s1b, strict=False)
        assert isinstance(err, f.errors.Error)
        assert err.kind == f.errors.ErrorKind.KilledBySignal
示例#4
0
def test_data_race(nworkers: int) -> None:
    """Test a data race when execute calls are interleaved."""
    with f.ManagedFun(nworkers=nworkers):
        dat = f.put(b"bla bla")
        step1 = f.morph(lambda x: x.decode().upper().encode(), dat)
        step2 = f.shell(
            "cat file1 file2; grep 'bla' file2 file1 > file3; date >> file3",
            inp=dict(file1=step1, file2=dat),
            out=["file2", "file3"],
        )

        f.execute(step1)
        f.execute(step2)
        f.wait_for(step1, timeout=20.0)
        f.wait_for(step2, timeout=20.0)
示例#5
0
def test_dynamic_dump() -> None:
    """Test whether a dynamic DAG gets graphed properly."""
    def split(a: bytes, b: bytes) -> list[dict[str, int]]:
        a = a.split()
        b = b.split()
        out = []
        for ia, ib in zip(a, b):
            out += [{
                "sum": int(ia.decode()) + int(ib.decode()),
                "product": int(ia.decode()) * int(ib.decode()),
            }]
        return out

    def apply(inp: Artefact[dict[str, Any]]) -> Artefact[str]:
        out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp)
        return out

    def combine(inp: Sequence[Artefact[str]]) -> Artefact[bytes]:
        def enc(inp: str) -> bytes:
            return inp.encode()

        out = [funsies.morph(enc, x, out=Encoding.blob) for x in inp]
        return funsies.utils.concat(*out)

    with funsies.ManagedFun(nworkers=1) as db:
        num1 = funsies.put(b"1 2 3 4 5")
        num2 = funsies.put(b"11 10 11 10 11")

        outputs = dynamic.sac(
            split,
            apply,
            combine,
            num1,
            num2,
            out=Encoding.blob,
        )
        outputs = funsies.morph(lambda x: x, outputs)
        nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash])
        assert len(artefacts) == 4  # not yet generated subdag parents
        print(len(artefacts))
        funsies.execute(outputs)
        funsies.wait_for(outputs, timeout=1.0)
        nodes, artefacts, labels, links = _graphviz.export(db, [outputs.hash])
        assert len(artefacts) == 22  # generated subdag parents
        assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
示例#6
0
def test_artefact_disk_distributed() -> None:
    """Test whether artefacts on disk works on different nodes."""
    # funsies
    import funsies as f

    with tempfile.TemporaryDirectory() as td:
        with f.ManagedFun(nworkers=1, data_url=f"file://{td}"):
            dat = f.put(b"bla bla")
            step1 = f.morph(lambda x: x.decode().upper().encode(), dat)
            step2 = f.shell("cat file1 file2",
                            inp=dict(file1=step1, file2=dat))
            step2b = f.shell("cat file1", inp=dict(file1=step1))

            f.execute(step2)
            f.wait_for(step2, 1.0)
            out = f.take(step2.stdout)
            assert out == b"BLA BLAbla bla"

            f.execute(step2b)
            f.wait_for(step2b, 1.0)
            out = f.take(step2b.stdout)
            assert out == b"BLA BLA"
def test_waiting_on_map_reduce() -> None:
    """Test waiting on the (linked) result of map-reduce."""
    def split(a: bytes, b: bytes) -> list[dict[str, int]]:
        a = a.split()
        b = b.split()
        out = []
        for ia, ib in zip(a, b):
            out += [{
                "sum": int(ia.decode()) + int(ib.decode()),
                "product": int(ia.decode()) * int(ib.decode()),
            }]
        return out

    def apply(inp: Artefact) -> Artefact:
        out = funsies.morph(lambda x: f"{x['sum']}//{x['product']}", inp)
        return out

    def combine(inp: Sequence[Artefact]) -> Artefact:
        out = [
            funsies.morph(lambda y: y.encode(), x, out=Encoding.blob)
            for x in inp
        ]
        return funsies.utils.concat(*out)

    with funsies.ManagedFun(nworkers=1):
        num1 = funsies.put(b"1 2 3 4 5")
        num2 = funsies.put(b"11 10 11 10 11")

        outputs = dynamic.sac(
            split,
            apply,
            combine,
            num1,
            num2,
            out=Encoding.blob,
        )
        funsies.execute(outputs)
        funsies.wait_for(outputs, timeout=1.0)
        assert funsies.take(outputs) == b"12//1112//2014//3314//4016//55"
示例#8
0
def test_raising_funsie() -> None:
    """Test funsie that raises an error.

    This test is specifically designed to catch the bug fixed in fa9af6a4
    where funsies that raised did not release their locks, leading to a race
    condition.
    """
    def raising_fun(*inp: str) -> bytes:
        raise RuntimeError("this funsie raises.")

    with f.ManagedFun(nworkers=2):
        s0a = f.morph(lambda x: x, "bla blabla")
        s0b = f.morph(lambda x: x, "blala")
        s1 = f.reduce(raising_fun, "bla bla", s0a, s0b, strict=True)
        f.execute(s1)
        f.wait_for(s1, timeout=2)
        with pytest.raises(UnwrapError):
            _ = f.take(s1)

        s2 = f.morph(lambda x: x, s1)
        f.execute(s2)
        f.wait_for(s2, timeout=0.5)
示例#9
0
def test_timeout_deadlock() -> None:
    """Test funsies that time out.

    Here we explicitly check if dependents are still enqueued or if the whole
    thing deadlocks.
    """
    def timeout_fun(*inp: str) -> bytes:
        time.sleep(3.0)
        return b"what"

    def cap(inp: bytes) -> bytes:
        return inp.capitalize()

    with f.ManagedFun(nworkers=2):
        # Test when python function times out
        s1 = f.reduce(timeout_fun,
                      "bla bla",
                      "bla bla",
                      opt=f.options(timeout=1))
        s1b = f.morph(cap, s1)
        # Test when shell function times out
        s2 = f.shell("sleep 20", "echo 'bla bla'", opt=f.options(timeout=1))
        s2b = f.morph(cap, s2.stdouts[1])
        f.execute(s1b, s2b)

        # Check err for reduce
        f.wait_for(s1b, timeout=1.5)
        err = f.take(s1b, strict=False)
        assert isinstance(err, f.errors.Error)
        assert err.kind == f.errors.ErrorKind.JobTimedOut
        assert err.source == s1.parent

        # Check err for shell
        f.wait_for(s2b, timeout=1.5)
        err = f.take(s2b, strict=False)
        assert isinstance(err, f.errors.Error)
        assert err.kind == f.errors.ErrorKind.JobTimedOut
        assert err.source == s2.hash
示例#10
0
def test_cleanup() -> None:
    """Test truncation."""
    # std
    import os

    def kill_self(*inp: bytes) -> bytes:
        pid = os.getpid()
        os.kill(pid, SIGKILL)
        time.sleep(2.0)
        return b"what"

    with f.ManagedFun(nworkers=1) as db:
        inp = "\n".join([f"{k}" for k in range(10)]).encode()
        fun = f.reduce(kill_self, inp)
        f.execute(fun)
        time.sleep(0.5)
        key1 = db.get(
            f._constants.join(f._constants.OPERATIONS, fun.parent, "owner"))
        f._context.cleanup_funsies(db)
        key2 = db.get(
            f._constants.join(f._constants.OPERATIONS, fun.parent, "owner"))
        assert key1 is not None
        assert key2 is None
示例#11
0
def test_nested_map_reduce(nworkers: int) -> None:
    """Test nested map-reduce."""

    # ------------------------------------------------------------------------
    # Inner
    def sum_inputs(*inp: int) -> int:
        out = 0
        for el in inp:
            out += el
        return out

    def split_inner(inp: str) -> list[int]:
        a = inp.split(" ")
        return [int(el) for el in a]

    def apply_inner(inp: Artefact) -> Artefact:
        return funsies.reduce(sum_inputs, inp, 1)

    def combine_inner(inp: Sequence[Artefact]) -> Artefact:
        return funsies.reduce(sum_inputs, *inp)

    # ------------------------------------------------------------------------
    # outer
    def split_outer(inp: list[str], fac: int) -> list[str]:
        out = [x + f" {fac}" for x in inp]
        return out

    def apply_outer(inp: Artefact) -> Artefact:
        outputs = dynamic.sac(
            split_inner,
            apply_inner,
            combine_inner,
            inp,
            out=Encoding.json,
        )
        return outputs

    def combine_outer(inp: Sequence[Artefact]) -> Artefact:
        out = [
            funsies.morph(lambda y: f"{y}".encode(), x, out=Encoding.blob)
            for x in inp
        ]
        return funsies.utils.concat(*out, join=b",,")

    with funsies.ManagedFun(nworkers=nworkers):
        num1 = funsies.put("1 2 3 4 5")
        outputs = dynamic.sac(split_inner,
                              apply_inner,
                              combine_inner,
                              num1,
                              out=Encoding.json)
        funsies.execute(outputs)
        funsies.wait_for(outputs, timeout=30.0)
        assert funsies.take(outputs) == 20

        # Now try the nested one
        num = funsies.put(["1 2", "3 4 7", "10 12", "1"])
        factor = funsies.put(-2)
        # split -> 1 2 -2|3 4 7 -2|10 12 -2| 1 -2
        # apply -> split2 -> 1, 2,-2 | 3,4,7,-2|10,12,-2|1,-2
        # apply2 -> 2, 3,-1 | 4,5,8,-1|11,13,-1|2,-1
        # combine2 -> 4|16|23|1
        # combine -> 4,,16,,23,,1
        ans = b"4,,16,,23,,1"

        outputs = dynamic.sac(
            split_outer,
            apply_outer,
            combine_outer,
            num,
            factor,
            out=Encoding.blob,
        )
        funsies.execute(outputs)
        funsies.wait_for(outputs, timeout=30.0)
        assert funsies.take(outputs) == ans