Пример #1
0
    def test_raises_on_non_dependend_job_injection2_can_be_ignored(self):
        o = Dummy()
        of = "out/A"

        def do_write():
            write(of, o.A)  # + o.B - but B is not in the dependency chain!

        job = ppg.FileGeneratingJob(of, do_write)
        ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D"))

        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            # let's not do anything with dlA

        gen_job = ppg.DependencyInjectionJob(
            "C", generate_deps, check_for_dependency_injections=False)
        job.depends_on(gen_job)
        ppg.run_pipegraph()

        assert os.path.exists(of)  # since the gen job crashed
Пример #2
0
    def test_raises_on_non_dependend_job_injection2(self):
        o = Dummy()
        of = "out/A"

        def do_write():
            write(of, o.A + o.B)

        job = ppg.FileGeneratingJob(of, do_write)
        ppg.FileGeneratingJob("out/D", lambda: write("out/D", "D"))

        def generate_deps():
            def load_a():
                return "A"

            def load_b():
                return "B"

            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            # let's not do anything with dlA

        gen_job = ppg.DependencyInjectionJob("C", generate_deps)
        job.depends_on(gen_job)
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()

        assert not (os.path.exists(of))  # since the gen job crashed
        assert os.path.exists(
            "out/D")  # since it has no relation to the gen job actually...
        assert isinstance(gen_job.exception, ppg.JobContractError)
        assert "case 1" in str(gen_job.exception)
Пример #3
0
        def do_run2():
            of = "out/A"

            def inject():
                def dl_b():
                    o.b = "C"  # so this dl has changed...

                job_dl_b = ppg.DataLoadingJob("ob", dl_b)
                job_dl.depends_on(job_dl_b)

            job_fg = ppg.FileGeneratingJob(of, do_write)
            job_dl = ppg.DataLoadingJob("oa", dl_a)
            job_fg.depends_on(job_dl)
            job_inject = ppg.DependencyInjectionJob("inject", inject)
            job_dl.depends_on(job_inject)
            ppg.run_pipegraph()
Пример #4
0
    def test_basic(self, new_pipegraph):
        # TODO: there is a problem with this apporach. The AttributeLoadingJob
        # references different objects, since it get's pickled alongside with the method,
        # and depickled again, and then it's not the same object anymore,
        # so the FileGeneratingJob and the AttributeLoadingJob in this test
        # reference different objects.
        # I'm not sure how to handle this right now though.

        # I have an idea: Do JobGraphModifyingJobs in each slave, and send back just the
        # dependency data (and new job name).
        # that way, we can still execute on any slave, and all the pointers should be
        # right.
        new_pipegraph.new_pipegraph()

        o = Dummy()
        of = "out/A"

        def do_write():
            # logging.info("Accessing dummy (o) %i in pid %s" % (id(o), os.getpid()))
            write(of, o.A + o.B)

        job = ppg.FileGeneratingJob(of, do_write)

        def generate_deps():
            def load_a():
                # logging.info('executing load A')
                return "A"

            def load_b():
                # logging.info('executing load B')
                return "B"

            # logging.info("Creating dl on %i in pid %s" % (id(o), os.getpid()))
            dlA = ppg.AttributeLoadingJob("dlA", o, "A", load_a)
            # logging.info("created dlA")
            dlB = ppg.AttributeLoadingJob("dlB", o, "B", load_b)
            job.depends_on(dlA)
            job.depends_on(dlB)
            return [dlA, dlB]

        gen_job = ppg.DependencyInjectionJob("C", generate_deps)
        job.depends_on(gen_job)
        ppg.run_pipegraph()
        assert read(of) == "AB"
Пример #5
0
    def test_injecting_filegenerating_job(self):
        of = "out/A"

        def do_write():
            write(of, read("out/B"))

        job = ppg.FileGeneratingJob(of, do_write)

        def generate_dep():
            def write_B():
                write("out/B", "B")

            inner_job = ppg.FileGeneratingJob("out/B", write_B)
            job.depends_on(inner_job)

        job_gen = ppg.DependencyInjectionJob("gen_job", generate_dep)
        job.depends_on(job_gen)
        ppg.run_pipegraph()
        assert read("out/A") == "B"
Пример #6
0
    jobs = []
    global urls  # we use a global to communicate with the later job
    # please note that this is only possible in Job generating and data loading jobs,
    # not in the output jobs.
    urls = retrieve_urls()
    for url in urls:
        jobs.append(download_job(url))
    jobs.append(  # this makes sure that if you remove urls, the output job would also rerun.
        # adding urls not seen before would make it rerun either way.
        pypipegraph.ParameterInvariant('retrieved_urls', urls))
    return jobs


# A DependencyInjectionJob allows us to make job_count depend on the jobs returned by generate_download_jobs
# during the runtime of the graph
job_generate = pypipegraph.DependencyInjectionJob('retrieve_url',
                                                  generate_download_jobs)


# now, this needs to be more fancy as well.
def count_characters_and_write_output():
    counts = {}
    for url in urls:
        dj = download_job(
            url
        )  # this will get us the *same* object as download_job(url) returned the first time
        target_file = dj.job_id  # the job_id for FileGeneratingJob is the output filename.

        file_handle = open(target_file, 'rb')
        data = file_handle.read()
        count = len(data)
        counts[url] = len(data)
Пример #7
0
 def inner():
     ppg.DependencyInjectionJob(5, lambda: 1)
Пример #8
0
 def inner():
     ppg.DependencyInjectionJob("out/a", "shu")