def test_html_progress_with_exception(): plan = uberjob.Plan() x = plan.call(fail) with pytest.raises(uberjob.CallError): with tempfile.TemporaryDirectory() as temp: path = os.path.join(temp, "uberjob.html") uberjob.run(plan, output=x, progress=uberjob.progress.html_progress(path))
def test_max_errors(self): n = 10 count_slot = [0] def fizz(): count_slot[0] += 1 raise ValueError() plan = uberjob.Plan() calls = [plan.call(fizz) for _ in range(n)] for max_errors in [None, *range(n)]: count_slot[0] = 0 with self.subTest(max_errors=max_errors): with self.assertRaises(uberjob.CallError): uberjob.run( plan, output=calls, max_workers=1, max_errors=max_errors, progress=None, ) self.assertEqual( count_slot[0], n if max_errors is None else max_errors + 1 )
def test_has_a_cycle(self): plan = uberjob.Plan() x = plan.call(operator.add, 1, 2) y = plan.call(operator.add, x, 4) plan.add_dependency(y, x) with self.assertRaises(nx.HasACycle): uberjob.run(plan, output=y)
def test_stack_frame_basic(self): plan = uberjob.Plan() stack_frame = get_stack_frame(1) x = plan.call(operator.truediv, 1, 0) with self.assert_call_exception( expected_stack_frame=copy_with_line_offset(stack_frame, 1) ): uberjob.run(plan, output=x)
def main(): args = parse_args() os.makedirs(args.output_directory, exist_ok=True) touch_store = TouchFileStore(os.path.join(args.output_directory, "_FRESH")) if args.fresh: touch_store.write(None) plan, registry = build_plan(args) uberjob.run(plan, registry=registry, fresh_time=touch_store.get_modified_time())
def test_schedulers(self): plan = uberjob.Plan() x = plan.call(operator.add, 2, 2) y = plan.call(operator.add, x, 3) for scheduler in [None, "default", "random"]: self.assertEqual(uberjob.run(plan, output=y, scheduler=scheduler), 7) with self.assertRaises(ValueError): uberjob.run(plan, output=y, scheduler="fizz")
def test_failed_to_read_from_empty_store_2(self): p = uberjob.Plan() r = uberjob.Registry() x = r.source(p, TestStore()) y = r.source(p, TestStore()) p.add_dependency(x, y) with self.assert_failed_to_read_from_empty_store(): uberjob.run(p, registry=r, output=y)
def test_has_a_cycle(self): plan = uberjob.Plan() registry = uberjob.Registry() x = registry.source(plan, TestStore(1)) y = registry.source(plan, TestStore(2)) plan.add_dependency(x, y) plan.add_dependency(y, x) with self.assertRaises(nx.HasACycle): uberjob.run(plan, registry=registry, output=y)
def test_failed_run(self): def f(a, b): raise ValueError("Oops") p = uberjob.Plan() x = p.call(f, 2, 3) with self.assert_call_exception(ValueError, "Oops"): uberjob.run(p, output=x)
def test_registry_can_lie(self): p = uberjob.Plan() r = uberjob.Registry() x = p.call(operator.add, 2, 3) r.add(x, TestStore(7)) self.assertEqual(uberjob.run(p, output=x), 5) self.assertEqual(r[x].read_count, 0) self.assertEqual(r[x].write_count, 0) self.assertEqual(uberjob.run(p, output=x, registry=r), 7)
def test_stack_frame_registry_add(self): plan = uberjob.Plan() registry = uberjob.Registry() x = plan.call(operator.add, 2, 2) stack_frame = get_stack_frame(1) registry.add(x, TestStore(can_write=False)) with self.assert_call_exception( expected_stack_frame=copy_with_line_offset(stack_frame, 1)): uberjob.run(plan, registry=registry, output=x)
def test_render_dry_run(): plan = uberjob.Plan() registry = uberjob.Registry() x = plan.call(add, 2, 3) registry.add(x, TestStore()) uberjob.render(uberjob.run(plan, registry=registry, dry_run=True), format="svg") uberjob.render(uberjob.run(plan, registry=registry, output=x, dry_run=True), format="svg")
def test_stack_frame_registry_source(self): plan = uberjob.Plan() registry = uberjob.Registry() stack_frame = get_stack_frame(1) x = registry.source(plan, TestStore()) with self.assert_call_exception( expected_stack_frame=copy_with_line_offset(stack_frame, 1)): uberjob.run(plan, output=x) with self.assert_call_exception( expected_stack_frame=copy_with_line_offset(stack_frame, 1)): uberjob.run(plan, registry=registry, output=x)
def test_run_errors(self): for length in [0, 1, 2, 3, 5, 6]: for use_lit in [True, False]: with self.subTest(length=length, use_lit=use_lit): plan = uberjob.Plan() x = range(7, 7 + length) if use_lit: x = plan.lit(x) a, b, c, d = plan.unpack(x, 4) with self.assert_call_exception( expected_exception=ValueError): uberjob.run(plan, output=(d, c, b, a))
def test_dry_run(self): plan = uberjob.Plan() registry = uberjob.Registry() x = plan.lit(1) self.assertEqual( uberjob.run(plan, dry_run=True)[0].graph.number_of_nodes(), 0) store = TestStore() registry.add(x, store) physical_plan, _ = uberjob.run(plan, registry=registry, dry_run=True) self.assertEqual(plan.graph.number_of_nodes(), 1) self.assertGreater(physical_plan.graph.number_of_nodes(), 1) self.assertEqual(store.write_count, 0) uberjob.run(physical_plan, output=list(physical_plan.graph.nodes())) self.assertEqual(store.write_count, 1)
def test_basic2(self): for length in [0, 1, 2, 3]: with self.subTest(length=length): plan = uberjob.Plan() t = tuple(range(7, 7 + length)) self.assertEqual( uberjob.run(plan, output=plan.unpack(t, length)), t)
def test_complex_side_effects(self): m, n = 5, 3 p = uberjob.Plan() r = uberjob.Registry() s = TestStore() x = [p.call(lambda k: k * k, i) for i in range(m)] w = [ p.call( lambda k: s.write((s.read() if s.get_modified_time() else 0) + k), x[i]) for i in range(m) ] for a, b in (w[i:i + 2] for i in range(m - 1)): p.add_dependency(a, b) y = r.source(p, s) for i in range(m): p.add_dependency(w[i], y) z = [p.call(lambda k: k + s.read(), i) for i in range(n)] for i in range(n): p.add_dependency(y, z[i]) for _ in range(2): self.assertEqual( uberjob.run(p, registry=r, output=z), [a + sum(b * b for b in range(m)) for a in range(n)], )
def test_latest_key_wins_in_dict_collisions(self): p = uberjob.Plan() x = {"a": 1, "b": 2, "c": 3} x[p.lit("b")] = 4 x[p.lit("a")] = 5 self.assertEqual(len(x), 5) # Because, e.g., "a" is not lit("a") self.assertEqual(uberjob.run(p, output=x), {"a": 5, "b": 4, "c": 3})
def test_retry_vanilla(self): count_slot = [0] def fizz(): count_slot[0] += 1 raise ValueError() plan = uberjob.Plan() call = plan.call(fizz) for retry in [None, 1, 2, 3]: count_slot[0] = 0 with self.subTest(retry=retry): with self.assertRaises(uberjob.CallError): uberjob.run(plan, output=call, retry=retry) self.assertEqual(count_slot[0], 1 if retry is None else retry)
def test_dependent_source(self): p = uberjob.Plan() r = uberjob.Registry() x = r.source(p, TestStore(0)) s = TestStore() y = p.call(s.write, x) z = r.source(p, s) p.add_dependency(y, z) self.assertEqual(uberjob.run(p, registry=r, output=z), 0) self.assertEqual(s.read_count, 1) self.assertEqual(s.write_count, 1) self.assertEqual(uberjob.run(p, registry=r, output=z), 0) self.assertEqual(s.read_count, 2) self.assertEqual(s.write_count, 1)
def test_source_dependent_on_write(self): p = uberjob.Plan() r = uberjob.Registry() x = p.call(lambda: 5) r.add(x, TestStore()) y = r.source(p, r[x]) p.add_dependency(x, y) self.assertEqual(uberjob.run(p, output=y, registry=r), 5)
def test_call_with_side_effects_but_no_args_or_return(self): p = uberjob.Plan() r = uberjob.Registry() xs = TestStore(0) x = r.source(p, xs) ys = TestStore() y = p.call(lambda: ys.write(xs.read())) z = r.source(p, ys) p.add_dependency(x, y) p.add_dependency(y, z) for i in range(2): with self.subTest(i=i): uberjob.run(p, registry=r) self.assertEqual(xs.read_count, 1) self.assertEqual(ys.read_count, 0) self.assertEqual(ys.write_count, 1) self.assertEqual(ys.value, 0)
def test_fresh_time_advanced(self): p = uberjob.Plan() r = uberjob.Registry() t0 = dt.datetime.now() store_a = TestStore(7, modified_time=t0) a = r.source(p, store_a) store_c = TestStore() b = p.call(store_c.write, a) c = r.source(p, store_c) p.add_dependency(b, c) uberjob.run(p, registry=r) self.assertEqual(store_a.read_count, 1) self.assertEqual(store_c.read_count, 0) self.assertEqual(store_c.write_count, 1) self.assertGreater(store_c.modified_time, store_a.modified_time) uberjob.run(p, registry=r, fresh_time=store_c.modified_time) self.assertEqual(store_a.read_count, 1) self.assertEqual(store_c.read_count, 0) self.assertEqual(store_c.write_count, 1) uberjob.run(p, registry=r, fresh_time=store_c.modified_time + dt.timedelta(seconds=1)) self.assertEqual(store_a.read_count, 2) self.assertEqual(store_c.read_count, 0) self.assertEqual(store_c.write_count, 2)
def test_retry_custom(self): retry_count_slot = [0] fizz_count_slot = [0] def create_retry(attempts): def inner_retry(f): def wrapper(*args, **kwargs): for attempt_index in range(attempts): retry_count_slot[0] += 1 try: return f(*args, **kwargs) except Exception: is_last_attempt = attempt_index == attempts - 1 if is_last_attempt: raise return wrapper return inner_retry def fizz(x): fizz_count_slot[0] += 1 if fizz_count_slot[0] < 5: raise ValueError() return x * x plan = uberjob.Plan() y = plan.call(fizz, 3) with self.assertRaises(uberjob.CallError): uberjob.run(plan, output=y, retry=create_retry(2)) self.assertEqual(retry_count_slot[0], 2) self.assertEqual(fizz_count_slot[0], 2) retry_count_slot = [0] fizz_count_slot = [0] self.assertEqual(uberjob.run(plan, output=y, retry=create_retry(999)), 9) self.assertEqual(retry_count_slot[0], 5) self.assertEqual(fizz_count_slot[0], 5)
def test_retry_validation(self): plan = uberjob.Plan() with self.assertRaises(TypeError): uberjob.run(plan, retry="hello") with self.assertRaises(ValueError): uberjob.run(plan, retry=0) with self.assertRaises(ValueError): uberjob.run(plan, retry=-1)
def test_registry_complex(self): for x_stale, y_stale, z_stale, z_output in itertools.product( [False, True], repeat=4): with self.subTest(x_stale=x_stale, y_stale=y_stale, z_stale=z_stale, z_output=z_output): p = uberjob.Plan() r = uberjob.Registry() x = p.call(operator.add, 2, 3) y = p.call(operator.add, 4, 5) z = p.call(operator.add, x, y) r.add( x, TestStore( Missing if x_stale else uberjob.run(p, output=x))) r.add( y, TestStore( Missing if y_stale else uberjob.run(p, output=y))) r.add( z, TestStore( Missing if z_stale else uberjob.run(p, output=z))) self.assertEqual( uberjob.run(p, output=z if z_output else None, registry=r), 14 if z_output else None, ) self.assertEqual(r[x].read_count, int(x_stale or y_stale or z_stale)) self.assertEqual(r[x].write_count, int(x_stale)) self.assertEqual(r[y].read_count, int(x_stale or y_stale or z_stale)) self.assertEqual(r[y].write_count, int(y_stale)) self.assertEqual(r[z].read_count, int(z_output)) self.assertEqual(r[z].write_count, int(x_stale or y_stale or z_stale))
def test_stale_source_successors_run_after_stale_source_predecessors(self): p = uberjob.Plan() r = uberjob.Registry() t0 = dt.datetime.utcnow() t1 = t0 + dt.timedelta(seconds=1) x = r.source(p, TestStore(1, modified_time=t1)) s = TestStore(2, modified_time=t0) y = p.call(s.write, x) z = r.source(p, s) p.add_dependency(y, z) w = p.call(s.read) p.add_dependency(z, w) self.assertEqual(uberjob.run(p, registry=r, output=w), 1)
def test_structured_output(self): p = uberjob.Plan() x = p.call(lambda: 1) y = p.call(lambda n: n + 2, x) self.assertEqual(uberjob.run(p, output=[x, y]), [1, 3]) self.assertEqual(uberjob.run(p, output=(x, y)), (1, 3)) self.assertEqual( uberjob.run(p, output={"a": x, "b": [x, y], "c": (1, 2, 3, 4)}), {"a": 1, "b": [1, 3], "c": (1, 2, 3, 4)}, ) self.assertEqual( uberjob.run(p, output={(x, y): "a", (3, 4): "b"}), {(1, 3): "a", (3, 4): "b"}, ) self.assertEqual(uberjob.run(p, output={x, y}), {1, 3}) self.assertEqual(uberjob.run(p, output=7), 7) self.assertEqual(uberjob.run(p, output=[]), []) self.assertEqual(uberjob.run(p, output=dict()), dict())
def test_pruning(self): p = uberjob.Plan() r = uberjob.Registry() x = p.call(operator.floordiv, 1, 0) y = p.call(operator.floordiv, 14, 2) self.assertEqual(uberjob.run(p, output=y), 7) self.assertEqual(uberjob.run(p, output=y, registry=r), 7) with self.assert_call_exception(ZeroDivisionError): uberjob.run(p, output=x) with self.assert_call_exception(ZeroDivisionError): uberjob.run(p, output=x, registry=r)
def test_fresh_time_basic(self): p = uberjob.Plan() r = uberjob.Registry() x = p.call(operator.add, 2, 3) r.add(x, TestStore()) uberjob.run(p, registry=r) self.assertEqual(r[x].write_count, 1) uberjob.run(p, registry=r, fresh_time=r[x].modified_time) self.assertEqual(r[x].write_count, 1) uberjob.run(p, registry=r, fresh_time=r[x].modified_time + dt.timedelta(seconds=1)) self.assertEqual(r[x].write_count, 2)