def test_filesets_dont_work_for_sequences(): class One(Transform): input_tags = {"foo"} output_tags = {fileset("files")} def script(self): pass class Two(Transform): input_tags = {fileset("files")} output_tags = {"bar"} def script(self): pass with pytest.raises(BakerUnsupportedError): sequence([One, Two])( input_paths={ "foo": "./tests/__data__/foo.txt" }, output_paths={ "bar": "./tests/__data__/bar.txt" }, overwrite=True, ).run()
def test_gap_in_output_and_input(): class StepOne(Transform): input_tags = {"foo"} output_tags = {"bar", "bingo"} def script(self): pass class StepTwo(Transform): input_tags = {"bar"} output_tags = {"baz"} def script(self): pass class StepThree(Transform): input_tags = {"baz", "bingo"} output_tags = {"bop"} def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree]) assert Seq.input_tags == {"foo"} assert Seq.output_tags == {"bop"}
def test_exposed_intermediates(): class StepOne(Transform): input_tags = {"foo"} output_tags = {"bar"} def script(self): pass class StepTwo(Transform): input_tags = {"bar"} output_tags = {"baz"} def script(self): pass class StepThree(Transform): input_tags = {"baz"} output_tags = {"bop"} def script(self): pass class StepFour(Transform): input_tags = {"bop"} output_tags = {"bip"} def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree, StepFour], exposed_intermediates={"bar", "baz"}) assert Seq.output_tags == {"bip", "bar", "baz"}
def test_complicated_dep_path(): class StepOne(Transform): input_tags = {"foo"} output_tags = {"bar"} def script(self): pass class StepTwo(Transform): input_tags = {"bar", "bongo"} output_tags = {"baz"} def script(self): pass class StepThree(Transform): input_tags = {"baz", "bingo"} output_tags = {"bop"} def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree]) assert Seq.input_tags == {"foo", "bingo", "bongo"} assert Seq.output_tags == {"bop"}
def test_gap_in_output_and_input(): class StepOne(Transform): foo = InputTag("foo") bar = OutputTag("bar") bingo = OutputTag("bingo") def script(self): pass class StepTwo(Transform): bar = InputTag("bar") baz = OutputTag("baz") def script(self): pass class StepThree(Transform): baz = InputTag("baz") bingo = InputTag("bingo") bop = OutputTag("bop") def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree]) assert Seq.input_tags == {"foo"} assert Seq.output_tags == {"bop"}
def test_exposed_intermediates(): class StepOne(Transform): foo = InputTag("foo") bar = OutputTag("bar") def script(self): pass class StepTwo(Transform): bar = InputTag("bar") baz = OutputTag("baz") def script(self): pass class StepThree(Transform): baz = InputTag("baz") bop = OutputTag("bop") def script(self): pass class StepFour(Transform): bop = InputTag("bop") bip = OutputTag("bip") def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree, StepFour], exposed_intermediates={"bar", "baz"}) assert Seq.output_tags == {"bip", "bar", "baz"}
def test_complicated_dep_path(): class StepOne(Transform): foo = InputTag("foo") bar = OutputTag("bar") def script(self): pass class StepTwo(Transform): bar = InputTag("bar") bongo = InputTag("bongo") baz = OutputTag("baz") def script(self): pass class StepThree(Transform): baz = InputTag("baz") bleep = InputTag("bingo") boppo = OutputTag("bop") def script(self): pass Seq = sequence([StepOne, StepTwo, StepThree]) assert Seq.input_tags == {"foo", "bingo", "bongo"} assert Seq.output_tags == {"bop"}
def test_real_world(): Pipeline = sequence([ merge([sequence([BuildTrainDf, Train]), BuildTestDf]), Predict, EvaluateResults, ]) Pipeline( input_paths={ "raw_train_images": "./tests/__data__/mnist/optdigits.tra", "raw_test_images": "./tests/__data__/mnist/optdigits.tes", }, output_paths={ "accuracy": "/tmp/accuracy" }, overwrite=True, ).run() with open("/tmp/accuracy", "r") as f: assert f.read() == "0.9032"
def test_in_memory_intermediates(): in_memory_context = BakerContext(fs_for_intermediates="mem") class StepOne(Transform): input_tags = {"foo"} output_tags = {"bar"} def script(self): with self.input_files["foo"].open() as f: data = f.read() with self.output_files["bar"].open() as f: f.write(data) class StepTwo(Transform): input_tags = {"bar"} output_tags = {"baz"} def script(self): with self.input_files["bar"].open() as f: data = f.read() with self.output_files["baz"].open() as f: f.write(data + " processed") class StepThree(Transform): input_tags = {"baz", "bleep"} output_tags = {"boppo"} def script(self): with self.input_files["baz"].open() as f: data = f.read() with self.input_files["bleep"].open() as f: data2 = f.read() with self.output_files["boppo"].open() as f: f.write(data + " " + data2) Seq = sequence([StepOne, StepTwo, StepThree]) Seq( input_paths={ "foo": "./tests/__data__/foo.txt", "bleep": "./tests/__data__/bleep.txt", }, output_paths={ "boppo": "/tmp/boppo" }, context=in_memory_context, overwrite=True, ).run() with open("/tmp/boppo", "r") as f: assert f.read() == "foo contents processed bleep contents"
def test_in_memory_intermediates(): in_memory_context = BakerDriverContext(fs_for_intermediates="memory") class StepOne(Transform): foo = InputTag("foo") bar = OutputTag("bar") def script(self): with self.foo.open() as f: data = f.read() with self.bar.open() as f: f.write(data) class StepTwo(Transform): bar = InputTag("bar") baz = OutputTag("baz") def script(self): with self.bar.open() as f: data = f.read() with self.baz.open() as f: f.write(data + " processed") class StepThree(Transform): baz = InputTag("baz") bleep = InputTag("bleep") boppo = OutputTag("boppo") def script(self): with self.baz.open() as f: data = f.read() with self.bleep.open() as f: data2 = f.read() with self.boppo.open() as f: f.write(data + " " + data2) Seq = sequence([StepOne, StepTwo, StepThree]) in_memory_context.run( Seq( input_paths={ "foo": "./tests/__data__/foo.txt", "bleep": "./tests/__data__/bleep.txt", }, output_paths={"boppo": "/tmp/boppo"}, overwrite=True, )) with open("/tmp/boppo", "r") as f: assert f.read() == "foo contents processed bleep contents"
def test_basic_sequence(): class Foo(Transform): input_tags = {"yo"} output_tags = {"hi"} def script(): pass class Bar(Transform): input_tags = {"hi", "there"} output_tags = {"sup", "dude"} def script(): pass assert sequence([Foo, Bar], name="Baz").structure() == { "type": "sequence", "name": "Baz", "input_tags": ["there", "yo"], "output_tags": ["dude", "sup"], "steps": [ { "input_tags": ["yo"], "name": "Foo", "output_tags": ["hi"], "type": "leaf", }, { "input_tags": ["hi", "there"], "name": "Bar", "output_tags": ["dude", "sup"], "type": "leaf", }, ], }
class StepThree(Transform): baz = InputTag("baz") bleep = InputTag("bleep") boppo = OutputTag("boppo") def script(self): with self.baz.open() as f: data = f.read() with self.bleep.open() as f: data2 = f.read() with self.boppo.open() as f: f.write(data + " " + data2) BaseSeq = sequence([StepOne, sequence([StepTwo, StepThree])]) def test_pickle_nested_sequence(): Seq = pickle.loads(pickle.dumps(BaseSeq)) Seq( input_paths={ "foo": "./tests/__data__/foo.txt", "bleep": "./tests/__data__/bleep.txt", }, output_paths={ "boppo": "/tmp/boppo" }, overwrite=True, ).run()
}, ) BuildTestDf = map_tags( BuildDf, input_mapping={"raw_images": "raw_test_images"}, output_mapping={ "df": "to_predict_on", "labels": "test_labels" }, ) Pipeline = sequence( [ merge([sequence([BuildTrainDf, Train]), BuildTestDf]), Predict, EvaluateResults, ], exposed_intermediates={"model"}, name="MNISTPipeline", ) def test_real_world(): # Try pickling and unpickling first -- ensure these things work. PickledPipeline = pickle.loads(pickle.dumps(Pipeline)) PickledPipeline( input_paths={ "raw_train_images": "./tests/__data__/mnist/optdigits.tra", "raw_test_images": "./tests/__data__/mnist/optdigits.tes", }, output_paths={
def test_sequence_defined_name(): class Lol(Transform): def script(self): pass assert sequence([Lol, Lol], name="TootToot").name == "TootToot"
def test_sequence_generated_name(): class Lol(Transform): def script(self): pass assert sequence([Lol, Lol]).name == "Seq(Lol,Lol)"