def replace_list_flow(): job1 = Job(add, function_args=(1, )) job2 = Job(add, function_args=(job1.output, )) job3 = Job(add, function_args=(5, )) job4 = Job(add, function_args=(job3.output, )) flow1 = Flow([job1, job2], output={"1": job1.output, "2": job2.output}) flow2 = Flow([job3, job4], output={"3": job3.output, "4": job4.output}) return Response(replace=[flow1, flow2])
def test_dag_validation(): from jobflow import Flow, Job # test cycle detection of jobs job1 = Job(add, function_args=(1, 2)) job2 = Job(add, function_args=(job1.output, 2)) job1.function_args = (job2.output, 2) flow = Flow(jobs=[job1, job2]) with pytest.raises(ValueError): next(flow.iterflow())
def test_set_output(): from jobflow.core.flow import Flow add_job1 = get_test_job() add_job2 = get_test_job() add_job3 = get_test_job() flow = Flow([add_job1, add_job2], output=add_job2.output) flow.output = add_job1.output assert flow.output.uuid == add_job1.uuid with pytest.raises(ValueError): flow.output = [add_job3.output]
def test_hosts(): from jobflow.core.flow import Flow # test single job add_job1 = get_test_job() add_job2 = get_test_job() flow1 = Flow(add_job1) flow2 = Flow([flow1, add_job2]) flow3 = Flow(flow2) assert add_job1.hosts == [flow1.uuid, flow2.uuid, flow3.uuid] assert add_job2.hosts == [flow2.uuid, flow3.uuid] assert flow1.hosts == [flow2.uuid, flow3.uuid] assert flow2.hosts == [flow3.uuid] assert flow3.hosts == []
def test_serialization(): import json from monty.json import MontyDecoder, MontyEncoder from jobflow import Flow flow = Flow([]) flow_host = Flow([flow]) host_uuid = flow_host.uuid encoded_flow = json.loads(MontyEncoder().encode(flow_host)) decoded_flow = MontyDecoder().process_decoded(encoded_flow) assert decoded_flow.jobs[0].host == host_uuid
def test_pass_manager_config(): from jobflow import Flow, Job from jobflow.core.job import pass_manager_config manager_config = {"abc": 1} # test single job test_job1 = Job(add, function_args=(1, )) pass_manager_config(test_job1, manager_config) assert test_job1.config.manager_config == manager_config # test list job test_job1 = Job(add, function_args=(1, )) test_job2 = Job(add, function_args=(1, )) pass_manager_config([test_job1, test_job2], manager_config) assert test_job1.config.manager_config == manager_config assert test_job2.config.manager_config == manager_config # test flow test_job1 = Job(add, function_args=(1, )) test_job2 = Job(add, function_args=(1, )) flow = Flow([test_job1, test_job2]) pass_manager_config(flow, manager_config) assert test_job1.config.manager_config == manager_config assert test_job2.config.manager_config == manager_config # test bad input with pytest.raises(ValueError): pass_manager_config(["str"], manager_config)
def get_maker_flow(return_makers=False): from dataclasses import dataclass from jobflow import Flow, Maker, job @dataclass class AddMaker(Maker): name: str = "add" b: int = 2 @job def make(self, a): return a + self.b @dataclass class DivMaker(Maker): name: str = "div" b: int = 5 @job def make(self, a): return a / self.b add_maker = AddMaker(b=3) div_maker = DivMaker(b=4) add_job = add_maker.make(2) div_job = div_maker.make(add_job.output) flow = Flow([add_job, div_job]) if return_makers: return flow, (AddMaker, DivMaker) else: return flow
def get_name_flow(): first_name = generate_first_name() second_name = generate_second_name() full_name = connect_name(first_name.output, second_name.output) return Flow([first_name, second_name, full_name], full_name.output, name="Get Name")
def get_test_flow(): from jobflow import Flow, Job add_job = Job(add, function_args=(1, 2)) div_job = Job(div, function_args=(add_job.output, ), function_kwargs={"b": 3}) div_job.metadata = {"b": 3} return Flow([add_job, div_job])
def test_append_name(): from jobflow import Flow # test append flow = get_test_flow() flow.append_name(" test") assert flow.name == "Flow test" assert flow.jobs[0].name == "add test" # test prepend flow = get_test_flow() flow.append_name("test ", prepend=True) assert flow.name == "test Flow" assert flow.jobs[0].name == "test add" # test empty Flow flow = Flow([], name="abc") flow.append_name(" test") assert flow.name == "abc test"
def start_timing_jobs(websites: List[str]): from jobflow.core.job import Response jobs = [] for website in websites: time_job = time_website(website) time_job.name = f"time {website}" jobs.append(time_job) output = [j.output for j in jobs] return Response(replace=Flow(jobs, output))
def replace_func_flow(a, b): first_job = simple_job(str(a + b)) second_job = simple_job(first_job.output) flow = Flow( [first_job, second_job], { "first": first_job.output, "second": second_job.output }, ) return Response(output=a + b, replace=flow)
def test_flow_job_mixed(): from jobflow.core.flow import Flow # test job and flows add_job = get_test_job() add_job2 = get_test_job() subflow = Flow([add_job2]) flow = Flow([add_job, subflow]) assert flow.host is None assert flow.output is None assert flow.job_uuids == (add_job.uuid, add_job2.uuid) assert flow.jobs[0].host == flow.uuid assert flow.jobs[1].host == flow.uuid # test with list multi outputs add_job = get_test_job() add_job2 = get_test_job() subflow = Flow([add_job2], output=add_job2.output) flow = Flow([add_job, subflow], output=[add_job.output, subflow.output]) assert flow.output[0] == add_job.output assert flow.output[1] == add_job2.output # test all jobs/flows included needed to generate outputs add_job = get_test_job() add_job2 = get_test_job() subflow = Flow([add_job2], output=add_job2.output) with pytest.raises(ValueError): Flow([add_job], output=[add_job.output, subflow.output])
def test_to_pydot(): from jobflow import Flow, Job from jobflow.utils.graph import to_pydot # test edges add_job1 = Job(add, function_args=(1, 2)) add_job2 = Job(add, function_args=(1, add_job1.output)) flow = Flow([add_job1, add_job2]) pydot = to_pydot(flow) assert pydot is not None # test nested add_job1 = Job(add, function_args=(1, 2)) add_job2 = Job(add, function_args=(1, 2)) add_job3 = Job(add, function_args=(1, 2)) add_job4 = Job(add, function_args=(1, 2)) flow1 = Flow([add_job1, add_job2]) flow2 = Flow([add_job3, add_job4]) main_flow = Flow([flow1, flow2]) pydot = to_pydot(main_flow) assert pydot is not None
def test_contains_job_or_flow(): from jobflow import Flow, Job from jobflow.utils import contains_flow_or_job job = Job(str) flow = Flow([]) assert contains_flow_or_job(True) is False assert contains_flow_or_job(1) is False assert contains_flow_or_job("abc") is False assert contains_flow_or_job(job) is True assert contains_flow_or_job(flow) is True assert contains_flow_or_job([flow]) is True assert contains_flow_or_job([[flow]]) is True assert contains_flow_or_job({"a": flow}) is True assert contains_flow_or_job({"a": [flow]}) is True assert contains_flow_or_job(job) is True assert contains_flow_or_job([job]) is True assert contains_flow_or_job([[job]]) is True assert contains_flow_or_job({"a": job}) is True assert contains_flow_or_job({"a": [job]}) is True
jobs = [] for website in websites: time_job = time_website(website) time_job.name = f"time {website}" jobs.append(time_job) output = [j.output for j in jobs] return Response(replace=Flow(jobs, output)) @job def sum_times(times: List[float]): return sum(times) # create a flow that will: # 1. load a list of websites from a file # 2. generate one new job for each website to time the website loading # 3. sum all the times together read_websites_job = read_websites() timings_job = start_timing_jobs(read_websites_job.output) sum_job = sum_times(timings_job.output) flow = Flow([read_websites_job, timings_job, sum_job]) # draw the flow graph flow.draw_graph().show() # run the flow, "responses" contains the output of all jobs responses = run_locally(flow) print(responses)
from dataclasses import dataclass from jobflow import Flow, Maker, job, run_locally @dataclass class AddMaker(Maker): name: str = "add" c: int = 10 @job def make(self, a, b, d=0.5): return a + b + self.c + d maker = AddMaker(c=10) add_first = maker.make(1, 2) add_second = maker.make(add_first.output, 5) flow = Flow([add_first, add_second]) flow.update_maker_kwargs({"_inc": {"c": 50}}, dict_mod=True) flow.update_kwargs({"d": 0.2}) # run the flow, "output" contains the output of all jobs output = run_locally(flow) print(output)
def connect_name(first_name, second_name): return f"{first_name} {second_name}" @job def print_inputs(inputs): print(inputs) def get_name_flow(): first_name = generate_first_name() second_name = generate_second_name() full_name = connect_name(first_name.output, second_name.output) return Flow([first_name, second_name, full_name], full_name.output, name="Get Name") name_flow_a = get_name_flow() name_flow_b = get_name_flow() print_job = print_inputs([name_flow_a.output, name_flow_b.output]) # create a new flow to contain the nested flow outer_flow = Flow([name_flow_a, name_flow_b, print_job]) # draw the flow graph outer_flow.draw_graph().show() # run the flow run_locally(outer_flow)
def _gen(): flow1 = connected_flow() flow2 = connected_flow() flow2.jobs[0].function_args = (flow1.jobs[1].output, ) return Flow([flow1, flow2], flow2.output)
def _gen(): replace = replace_and_detour_job(5, 6) simple = simple_job("12345") return Flow([replace, simple], simple.output, order=JobOrder.LINEAR)
def _gen(): store = stored_data_job("12345") return Flow([store])
def _gen(): error = error_job() simple1 = simple_job(error.output) simple2 = simple_job(simple1.output) return Flow([error, simple1, simple2])
def _gen(): simple = simple_job("12345") return Flow([simple], simple.output)
def make(self, a, b): first = add(a, b) second = add(first.output, b) return Flow([first, second], second.output, name=self.name)
def make(self, a, b): first = self.add1.make(a, b) second = self.add2.make(first.output, b) return Flow([first, second], second.output, name=self.name)
def _gen(): add = addition_job(5, 6) return Flow([add], add.output)
@job def encode_message(message): """Encode a message using base64.""" from base64 import b64encode return b64encode(message.encode()).decode() @job def decode_message(message): """Decode a message from base64.""" from base64 import b64decode return b64decode(message.encode()).decode() # Create two jobs, the first to encode a message and the second to decode it. encode = encode_message("Lo, a shadow of horror is risen") decode = decode_message(encode.output) # Create a flow containing the jobs. The order of the jobs doesn't matter and will be # determined by the connectivity of the jobs. flow = Flow([encode, decode]) # draw the flow graph flow.draw_graph().show() # run the flow, "output" contains the output of all jobs output = run_locally(flow) print(output)
def _gen(): detour = detour_job(5, 6) simple = simple_job("12345") return Flow([detour, simple], simple.output, order=JobOrder.LINEAR)
def _gen(): simple1 = simple_job("12345") simple2 = simple_job(simple1.output) return Flow([simple1, simple2], simple2.output, "Connected Flow")
"""A demonstration of how to enforce linear ordering of jobs.""" from jobflow import Flow, JobOrder, job @job def add(a, b): return a + b # No edges between the jobs as they do not depend on each other add_job_first = add(1, 2) add_job_second = add(4, 6) auto_flow = Flow([add_job_first, add_job_second], order=JobOrder.AUTO) auto_flow.draw_graph().show() # flow graph now shows an edge between the jobs due to the linear execution order add_job_first = add(1, 2) add_job_second = add(4, 6) linear_flow = Flow([add_job_first, add_job_second], order=JobOrder.LINEAR) linear_flow.draw_graph().show()