def test_pipeline_nested_mutate_inverse_transform_without_identities(): """ This test was required for a strange bug at the border of the pipelines that happened when the identities were not used. """ expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating, inversing, and calling each inverse_transform]") reversed(p).transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape, calling inverse_transforms. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_pipeline_nested_mutate_inverse_transform(): expected_tape = ["1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), TransformCallbackStep(tape.callback, ["5"]), Identity() ]), TransformCallbackStep(tape.callback, ["6"]), TransformCallbackStep(tape.callback, ["7"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) # will add range(1, 8) to tape. print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) # will add reversed(range(1, 8)) to tape. print(expected_tape) print(tape.get_name_tape()) assert expected_tape == tape.get_name_tape()
def test_should_save_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() pickle_checkpoint_step = PickleCheckpointStep('1', tmpdir) pipeline = Pipeline(steps=[ TransformCallbackStep(tape.callback, ["1"]), pickle_checkpoint_step, TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]) ]) pipeline, actual_data_inputs = pipeline.fit_transform( data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert actual_data_inputs == data_inputs assert actual_tape == ["1", "2", "3"] assert os.path.exists( pickle_checkpoint_step.get_checkpoint_file_path(data_inputs))
def test_tape_callback(): expected_tape = ["1", "2", "3", "a", "b", "4"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]), AddFeatures([ TransformCallbackStep(tape.callback, ["a"]), TransformCallbackStep(tape.callback, ["b"]), ]), TransformCallbackStep(tape.callback, ["4"]), Identity() ]) p.fit_transform(np.ones((1, 1))) assert tape.get_name_tape() == expected_tape
def test_pipeline_simple_mutate_inverse_transform(): expected_tape = ["1", "2", "3", "4", "4", "3", "2", "1"] tape = TapeCallbackFunction() p = Pipeline([ Identity(), TransformCallbackStep(tape.callback, ["1"]), TransformCallbackStep(tape.callback, ["2"]), TransformCallbackStep(tape.callback, ["3"]), TransformCallbackStep(tape.callback, ["4"]), Identity() ]) p, _ = p.fit_transform(np.ones((1, 1))) print("[mutating]") p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform") p.transform(np.ones((1, 1))) assert expected_tape == tape.get_name_tape()
def test_should_load_checkpoint_pickle(tmpdir: LocalPath): tape = TapeCallbackFunction() force_checkpoint_name = 'checkpoint_a' pickle_checkpoint_step = PickleCheckpointStep( force_checkpoint_name=force_checkpoint_name, cache_folder=tmpdir) pickle_checkpoint_step.set_checkpoint_path(force_checkpoint_name) with open(pickle_checkpoint_step.get_checkpoint_file_path(data_inputs), 'wb') as file: pickle.dump(data_inputs, file) pipeline = Pipeline( steps=[('a', TransformCallbackStep(tape.callback, ["1"]) ), ('b', TransformCallbackStep(tape.callback, ["2"]) ), (force_checkpoint_name, pickle_checkpoint_step ), ('c', TransformCallbackStep(tape.callback, ["3"]))]) pipeline, actual_data_inputs = pipeline.fit_transform( data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert actual_data_inputs == data_inputs assert actual_tape == ["3"]
def test_step_cloner(): tape = TapeCallbackFunction() data = [[1], [2], [3]] sc = StepClonerForEachDataInput(TransformCallbackStep(tape, ["-"]), copy_op=copy.copy) sc.fit_transform(data) print(tape) print(tape.get_name_tape()) print(tape.get_data()) assert tape.get_data() == data assert tape.get_name_tape() == ["-"] * 3
def read_checkpoint(self): return self.checkpoint_data_inputs def save_checkpoint(self, data_inputs): self.saved_data_inputs_checkpoint = data_inputs self.saved = True def should_resume(self, data_inputs) -> bool: return self.checkpoint_data_inputs is not None data_inputs = np.ones((1, 1)) expected_outputs = np.ones((1, 1)) chekpoint = SomeCheckpointStep(data_inputs) chekpoint_not_saved = SomeCheckpointStep(None) tape = TapeCallbackFunction() tape_without_checkpoint_test_arguments = ([ ("a", TransformCallbackStep(tape.callback, ["1"])), ("b", TransformCallbackStep(tape.callback, ["2"])), ("c", TransformCallbackStep(tape.callback, ["3"])) ], ["1", "2", "3"]) tape_checkpoint_not_saved_test_arguments = ([ ("a", TransformCallbackStep(tape.callback, ["1"])), ("b", SomeCheckpointStep(checkpoint_data_inputs=None)), ("c", TransformCallbackStep(tape.callback, ["2"])), ("d", TransformCallbackStep(tape.callback, ["3"])) ], ["1", "2", "3"]) tape_checkpoint_saved_after_first_step_test_arguments = ([