def enter_dir(self, dirname): """Set up a sample pipeline in a directory. Tests depend on sample data defined here.""" task = TaskerSubclass(dirname) # Totally arbitrary configuration "scheme" task.conf = dict(one='one_str', two=2.0, name=task.name) task.one_count = 0 @task.stores(storage.JSON('one.json')) def one(tsk): """Docstring: One""" task.one_count += 1 return task.conf['one'] # Goes to JSON @task.stores(storage.JSON('two.json'), storage.JSON('2b.json')) def two(tsk, one=one): self.assertEqual(one, task.conf['one']) return [ task.conf['two'], { 'twofloat': task.conf['two'], 'onestr': one, 'name': task.conf['name'] } ] @task.stores(storage.Pandas('three.h5')) # Positional form def three(tsk, one=one, two=two): assert one == task.conf['one'] twofloat = two[1]['twofloat'] assert twofloat == task.conf['two'] return pandas.Series([ twofloat, ]) @task.stores('four') def four(tsk, three=three, td='three_dummy'): assert three[0] == task.conf['two'] # First row of Series self.assertEqual(td.basename(), 'three_dummy') assert len(td.split()[0]) (task.p / 'four').touch() return 'dummy' # No storage @task def doesnt_store(tsk, three=task.three): return three[0] @task.computes # Alternate syntax def doesnt_store2(tsk, three_val=task.doesnt_store): return three_val # Storage with no-store gap @task.stores(storage.JSON('gapped.json')) def gapped(tsk, three_val=task.doesnt_store): return three_val return task
def enter_dir(self, dirname): """Set up a sample pipeline in a directory. Tests depend on sample data defined here.""" task = TaskerSubclass(dirname) # Totally arbitrary configuration "scheme" task.conf = dict(one='one_str', two=2.0, name=task.name) task.one_count = 0 @task.create_task([], storage.JSON('one.json')) def one(tsk, ins): """Docstring: One""" task.one_count += 1 assert len(ins) == 0 return task.conf['one'] # Goes to JSON @task.create_task(one, [storage.JSON('two.json'), storage.JSON('2b.json')]) def two(tsk, input): self.assertEqual(input, task.conf['one']) return task.conf['two'], { 'twofloat': task.conf['two'], 'onestr': input, 'name': task.conf['name'] } @task.create_task([one, two], storage.Pandas('three.h5')) def three(tsk, ins): assert ins[0] == task.conf['one'] twofloat = ins[1][1]['twofloat'] assert twofloat == task.conf['two'] return pandas.Series([ twofloat, ]) @task.create_task({'three': three, 'td': 'three_dummy'}, 'four') def four(tsk, ins): assert ins['three'][0] == task.conf['two'] # First row of Series self.assertEqual(ins['td'].basename(), 'three_dummy') assert len(ins['td'].split()[0]) (task.p / 'four').touch() return 'dummy' return task
def test_file_dependency(self): """A FileBase object is specified instead of a task.""" prebound = storage.JSON(self.testdir / 'prebound.json') @self.task def use_files(tsk, pb=prebound, runtime_bound=storage.JSON('runtime_bound.json')): return True input_filenames = [fn.basename() for fn in use_files.input_files] assert len(input_filenames) == 2 assert 'prebound.json' in input_filenames assert 'runtime_bound.json' in input_filenames
def test_missing_file_disaster(self): """Check that deleting an upstream input file does not invalidate all downstream results.""" input_file = storage.JSON(self.task.p / 'input_file.json') input_file.save(3) assert input_file() == 3 @self.task.stores(storage.JSON('stage1.json')) def stage1(tsk, input_value=input_file): return input_value @self.task.stores(storage.JSON('stage2.json')) def stage2(tsk, stage1=stage1): return stage1 assert not stage1.is_current() assert stage2() == 3 stage2.sync() assert stage1.is_current() assert stage2.is_current() assert stage2() == 3 input_file.filepath.remove() # Oops! assert stage1.is_current() assert stage2.is_current() assert stage2() == 3 # Still there stage2.clear() assert stage2() == 3 # Can recompute from stage1 try: stage1.force() # The one thing we can't do except IOError: pass else: raise AssertionError( 'Expected missing input file to cause IOError') assert stage1.is_current() # We didn't lose what results remain
def test_missing_file(self): """Check that missing dependencies make a non-storing task incomplete.""" assert not self.task.four.is_current() # Direct file dependency # Indirect file dependency missing_file = storage.JSON('dummy_missing') @self.task def missing_dependency(tsk, missing_file=missing_file): return missing_file assert not self.task.missing_dependency.is_current( ) # Direct file dependency @self.task def child_of_missing_dependency(tsk, missing_dep=missing_dependency): return missing_dep #assert self.task.child_of_missing_dependency +1 assert not self.task.child_of_missing_dependency.is_current()
def use_files(tsk, pb=prebound, runtime_bound=storage.JSON('runtime_bound.json')): return True