def test_base_reductions(out_dir, tf_eager_mode): train_model( out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.METRICS, CollectionKeys.LOSSES, ], reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), steps=["train"], eager=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_values(0) loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0] assert tr.tensor(loss_name).value(0) is not None metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0] assert tr.tensor(metric_name).value(0) is not None
def test_base_reductions(out_dir, tf_eager_mode): helper_keras_fit( trial_dir=out_dir, include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.METRICS, CollectionKeys.LOSSES ], reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), run_eagerly=tf_eager_mode, ) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None assert len(tr.tensor(weight_name).reduction_values( 0)) == len(ALLOWED_REDUCTIONS) + len(ALLOWED_NORMS) loss_name = tr.tensor_names(collection=CollectionKeys.LOSSES)[0] assert tr.tensor(loss_name).value(0) is not None metric_name = tr.tensor_names(collection=CollectionKeys.METRICS)[0] assert tr.tensor(metric_name).value(0) is not None
def test_gradtape_include_collections(out_dir): """ This test ensures that a training script written with GradientTape handles the case where hook config contains all collections mentioned through include collections """ include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.OPTIMIZER_VARIABLES, ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) helper_keras_gradtape(out_dir, hook=hook) trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x assert len(trial.tensor_names()) == (16 if is_tf_2_2() else 15) assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len( trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 1
def test_collection_reductions(out_dir, tf_eager_mode): tf.keras.backend.clear_session() hook = KerasHook( out_dir=out_dir, save_config=SaveConfig(save_interval=3), include_collections=[ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, ], ) hook.get_collection( CollectionKeys.WEIGHTS).reduction_config = ReductionConfig( norms=["l1"]) train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: try: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None except ValueError: # some tensors reduction can't be computed pass except TensorUnavailable: # sometimes we might not have tensor saved if it was only being # saved as reduction and the reduction computation failed pass
def test_include_collections(out_dir, tf_eager_mode): include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.OPTIMIZER_VARIABLES, ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode) trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x if tf_eager_mode: assert len(trial.tensor_names()) == 7 if is_tf_2_2() else 8 else: assert len(trial.tensor_names()) == 18 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert ( len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 2 if is_tf_2_2() and tf_eager_mode else 3 )
def test_keras_fit_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_fit(trial_dir=out_dir, hook=hook) print(create_trial_fast_refresh(out_dir).tensor_names(step=0)) verify_shapes(out_dir, 0)
def test_keras_gradtape_shapes(out_dir): hook = smd.KerasHook( out_dir=out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), ) helper_keras_gradtape(trial_dir=out_dir, hook=hook) verify_shapes(out_dir, 0) verify_shapes(out_dir, 500)
def test_shapes(out_dir, tf_eager_mode): strategy, _ = train_model( out_dir, save_all=True, save_config=SaveConfig(save_steps=[0]), reduction_config=ReductionConfig(save_shape=True), steps=["train"], eager=tf_eager_mode, ) multiworker = strategy.num_replicas_in_sync > 1 verify_shapes(out_dir, 0, multiworker=multiworker)
def test_tf_keras_shapes(out_dir): train_model( out_dir, save_all=True, reduction_config=ReductionConfig(save_shape=True), use_tf_keras=True, save_config=SaveConfig(save_steps=[0, 10]), eager=False, steps=["train", "eval", "predict", "train"], ) verify_shapes(out_dir, 0)
def test_include_only_custom_collection(out_dir, tf_eager_mode): include_collections = ["custom_optimizer_variables"] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) hook.get_collection("custom_optimizer_variables").include("Adam") helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode) trial = smd.create_trial(path=out_dir) assert len(trial.tensor_names()) == (8 if is_tf_2_2() and tf_eager_mode else 9) assert len(trial.tensor_names(collection="custom_optimizer_variables")) == 5
def test_include_collections(out_dir, tf_eager_mode): include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.METRICS, CollectionKeys.OPTIMIZER_VARIABLES, "custom_optimizer_variables", ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) hook.get_collection("custom_optimizer_variables").include("Adam") helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], run_eagerly=tf_eager_mode) trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x if tf_eager_mode: if is_tf_2_2(): assert len(trial.tensor_names()) == 16 else: assert len(trial.tensor_names()) == (12 if is_tf_2_3() else 13) else: assert len(trial.tensor_names()) == 18 assert len( trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len( trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len( trial.tensor_names(collection="custom_optimizer_variables")) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == ( 2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3)
def test_collection_reductions(out_dir, tf_eager_mode): hook = smd.KerasHook( out_dir, save_config=SaveConfig(save_interval=3), include_collections=[CollectionKeys.WEIGHTS, CollectionKeys.BIASES], ) hook.get_collection(CollectionKeys.WEIGHTS).reduction_config = ReductionConfig(norms=["l1"]) helper_keras_fit(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] bias_name = tr.tensor_names(collection=CollectionKeys.BIASES)[0] assert tr.tensor(bias_name).value(0) is not None try: tr.tensor(weight_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None
def test_collection_reductions(out_dir): hook = KerasHook( out_dir, save_config=SaveConfig(save_interval=3), include_collections=[CollectionKeys.WEIGHTS, CollectionKeys.GRADIENTS], ) hook.get_collection(CollectionKeys.GRADIENTS).reduction_config = ReductionConfig(norms=["l1"]) train_model(out_dir, hook=hook, steps=["train"]) tr = create_trial_fast_refresh(out_dir) weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0] grad_name = tr.tensor_names(collection=CollectionKeys.GRADIENTS)[0] assert tr.tensor(weight_name).value(0) is not None try: tr.tensor(grad_name).value(0) assert False except TensorUnavailableForStep: assert tr.tensor(weight_name).reduction_value(0, "l1") is not None
def create_smdebug_hook(out_dir): include_collections = [ CollectionKeys.WEIGHTS, CollectionKeys.BIASES, CollectionKeys.GRADIENTS, CollectionKeys.LOSSES, CollectionKeys.OUTPUTS, CollectionKeys.METRICS, CollectionKeys.LOSSES, CollectionKeys.OPTIMIZER_VARIABLES, ] save_config = SaveConfig(save_interval=3) hook = smd.KerasHook( out_dir, save_config=save_config, include_collections=include_collections, reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS), ) return hook