示例#1
0
def test_regex_filtering_for_default_collections(out_dir):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[CollectionKeys.LAYERS, CollectionKeys.GRADIENTS],
    )
    hook.get_collection(CollectionKeys.LAYERS).include("^dense")
    hook.get_collection(CollectionKeys.GRADIENTS).include("gradients/dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=10),
        steps=["train"],
        run_eagerly=True,
    )

    tr = create_trial_fast_refresh(out_dir)
    layer_tnames = tr.tensor_names(collection=CollectionKeys.LAYERS)
    gradient_tnames = tr.tensor_names(collection=CollectionKeys.GRADIENTS)
    assert len(layer_tnames) == (4 if is_tf_2_2() else 0)
    assert len(gradient_tnames) == (4 if is_tf_2_2() else 0)
    layer_pattern = r"^(dense)(_\d+)?\/(inputs|outputs)"
    gradient_pattern = r"gradients/dense"
    for tname in layer_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=layer_pattern, string=tname) is not None
    for tname in gradient_tnames:
        assert tr.tensor(tname).value(0) is not None
        assert re.match(pattern=gradient_pattern, string=tname) is not None
示例#2
0
def test_include_regex(out_dir):
    hook = KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    train_model(out_dir, hook=hook, save_config=SaveConfig(save_interval=9), steps=["train"])

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == 12
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
def test_layer_names_gradient_tape(out_dir):
    hook = smd.KerasHook(
        out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=[CollectionKeys.LAYERS],
    )
    helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9))

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection=CollectionKeys.LAYERS)
    pattern = r"^(flatten|dense|dropout)(_\d+)?\/(inputs|outputs)"
    for tname in tnames:
        assert re.match(pattern=pattern, string=tname) is not None
示例#4
0
def test_keras_fit_pure_eager(out_dir, tf_eager_mode):
    """
    Test save all and save default collection in fit() pure eager mode
    """
    hook = smd.KerasHook(out_dir=out_dir,
                         save_all=True,
                         save_config=SaveConfig(save_interval=3))
    helper_keras_fit(trial_dir=out_dir,
                     hook=hook,
                     eager=tf_eager_mode,
                     run_eagerly=True)

    trial = smd.create_trial(path=out_dir)
    if is_tf_2_2():
        assert len(trial.tensor_names()) == 27
    else:
        assert len(trial.tensor_names()) == (20 if is_tf_2_3() else 21)
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(
        trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(
        collection=CollectionKeys.INPUTS)) == (1 if is_tf_2_2() else 0)
    assert len(trial.tensor_names(
        collection=CollectionKeys.OUTPUTS)) == (2 if is_tf_2_2() else 0)
def test_save_all(out_dir, tf_eager_mode, workers):
    save_config = SaveConfig(save_steps=[5])
    strategy, saved_scalars = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=save_config,
        steps=["train"],
        eager=tf_eager_mode,
        include_workers=workers,
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    if tf_eager_mode:
        if is_tf_2_2():
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 +
                                       8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1)
            # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers
        else:
            assert len(
                tr.tensor_names()) == (6 + 2 + 1 + 5 +
                                       1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1)
    else:
        assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
                3 * strategy.num_replicas_in_sync +
                2 * strategy.num_replicas_in_sync)
        # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3
    for tname in tr.tensor_names():
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
    verify_files(out_dir, save_config, saved_scalars)
示例#6
0
def test_include_collections(out_dir, tf_eager_mode):
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS, reductions=ALLOWED_REDUCTIONS),
    )
    helper_keras_fit(out_dir, hook=hook, steps=["train", "eval", "predict"], eager=tf_eager_mode)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    if tf_eager_mode:
        assert len(trial.tensor_names()) == 7 if is_tf_2_2() else 8
    else:
        assert len(trial.tensor_names()) == 18
        assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert (
        len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 2
        if is_tf_2_2() and tf_eager_mode
        else 3
    )
def test_save_all_workers(out_dir, zcc=False):
    # Skip if no GPUS
    if get_available_gpus() == 0:
        return
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
        include_workers="all",
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.workers()) == get_available_gpus()
    assert len(tr.tensor_names(collection="weights"))
    assert (len(
        tr.tensor(tr.tensor_names(collection="weights")[0]).workers(5)) ==
            strategy.num_replicas_in_sync)

    assert "conv2d/weights/conv2d/kernel:0" in tr.tensor_names(
        collection="weights")
    assert (len(tr.tensor("conv2d/weights/conv2d/kernel:0").workers(5)) ==
            strategy.num_replicas_in_sync)

    assert len(tr.tensor_names(collection="biases"))
    assert "conv2d/weights/conv2d/bias:0" in tr.tensor_names(
        collection="biases")
    assert (len(tr.tensor(tr.tensor_names(
        collection="biases")[0]).workers(5)) == strategy.num_replicas_in_sync)
    assert len(tr.tensor_names(collection="gradients"))
def test_collection_reductions(out_dir, tf_eager_mode):
    tf.keras.backend.clear_session()
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=3),
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.GRADIENTS,
        ],
    )
    hook.get_collection(
        CollectionKeys.WEIGHTS).reduction_config = ReductionConfig(
            norms=["l1"])
    train_model(out_dir, hook=hook, steps=["train"], eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    weight_name = tr.tensor_names(collection=CollectionKeys.WEIGHTS)[0]

    try:
        tr.tensor(weight_name).value(0)
        assert False
    except TensorUnavailableForStep:
        try:
            assert tr.tensor(weight_name).reduction_value(0, "l1") is not None
        except ValueError:
            # some tensors reduction can't be computed
            pass
    except TensorUnavailable:
        # sometimes we might not have tensor saved if it was only being
        # saved as reduction and the reduction computation failed
        pass
def test_get_custom_and_default_collections():
    tmp_dir = TemporaryDirectory().name

    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.LOSSES,
        CollectionKeys.OPTIMIZER_VARIABLES,
        "custom_collection",
    ]

    hook = SessionHook(
        out_dir=tmp_dir,
        save_config=SaveConfig(save_interval=2),
        include_collections=include_collections,
    )
    hook.get_collection(name="custom_collection").include("random-regex")

    custom_collections, default_collections = hook._get_custom_and_default_collections(
    )

    assert len(custom_collections) == 1
    assert (len(default_collections) == 8 + 3
            )  # Addtional three collections are: all, default and sm_metrics
def test_include_regex(out_dir, tf_eager_mode, workers):
    hook = KerasHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=9),
        include_collections=["custom_coll"],
        include_workers=workers,
    )
    hook.get_collection("custom_coll").include("dense")
    strategy, _ = train_model(out_dir,
                              hook=hook,
                              steps=["train"],
                              eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    if tf_eager_mode:
        assert len(tnames) == (12 if is_tf_2_2() else 4)
    else:
        assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
def test_include_regex_opt_var(out_dir, tf_eager_mode, workers):
    include_collections = ["custom_optimizer_variables"]
    save_config = SaveConfig(save_interval=3)
    hook = KerasHook(
        out_dir=out_dir,
        save_config=save_config,
        include_collections=include_collections,
        include_workers=workers,
    )
    hook.get_collection("custom_optimizer_variables").include("Adam")
    strategy, _ = train_model(out_dir,
                              hook=hook,
                              steps=["train"],
                              eager=tf_eager_mode)

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_optimizer_variables")

    if tf_eager_mode:
        assert len(tnames) == 5
    else:
        assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
        assert len(
            tr.tensor(tname).workers(0)) == (1 if workers == "one" else
                                             strategy.num_replicas_in_sync)
示例#12
0
def test_gradtape_include_collections(out_dir):
    """
    This test ensures that a training script written with GradientTape
    handles the case where hook config contains all collections mentioned
    through include collections
    """
    include_collections = [
        CollectionKeys.WEIGHTS,
        CollectionKeys.BIASES,
        CollectionKeys.GRADIENTS,
        CollectionKeys.LOSSES,
        CollectionKeys.OUTPUTS,
        CollectionKeys.METRICS,
        CollectionKeys.OPTIMIZER_VARIABLES,
    ]
    save_config = SaveConfig(save_interval=3)
    hook = smd.KerasHook(
        out_dir,
        save_config=save_config,
        include_collections=include_collections,
        reduction_config=ReductionConfig(norms=ALLOWED_NORMS,
                                         reductions=ALLOWED_REDUCTIONS),
    )
    helper_keras_gradtape(out_dir, hook=hook)

    trial = smd.create_trial(path=out_dir)
    # can't save gradients in TF 2.x
    assert len(trial.tensor_names()) == (16 if is_tf_2_2() else 15)
    assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4
    assert len(
        trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5
    assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2
    assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
    assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == 1
def test_gradtape_include_regex(out_dir):
    """
    Test custom collection with regex
    """
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_gradtape(out_dir, hook=hook, save_config=SaveConfig(save_interval=9))

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")

    assert len(tnames) == (12 if is_tf_2_2() else 8)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
def test_simple_include(out_dir):
    pre_test_clean_up()
    hook = SessionHook(
        out_dir=out_dir,
        save_config=SaveConfig(save_interval=2),
        include_collections=["default", "losses"],
    )
    helper_test_simple_include(out_dir, hook)
def test_save_config_start_and_end(out_dir):
    pre_test_clean_up()
    hook = SessionHook(
        out_dir=out_dir,
        save_all=False,
        save_config=SaveConfig(save_interval=2, start_step=8, end_step=14),
    )
    helper_save_config_start_and_end(out_dir, hook)
def test_include_regex(out_dir, tf_eager_mode):
    hook = smd.KerasHook(
        out_dir, save_config=SaveConfig(save_interval=9), include_collections=["custom_coll"]
    )
    hook.get_collection("custom_coll").include("dense")
    helper_keras_fit(
        out_dir,
        hook=hook,
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        run_eagerly=tf_eager_mode,
    )

    tr = create_trial_fast_refresh(out_dir)
    tnames = tr.tensor_names(collection="custom_coll")
    assert len(tnames) == (12 if is_tf_2_2() else 4)
    for tname in tnames:
        assert tr.tensor(tname).value(0) is not None
def test_simple_include_regex(out_dir):
    pre_test_clean_up()
    hook = SessionHook(
        out_dir=out_dir,
        include_regex=["loss:0"],
        include_collections=[],
        save_config=SaveConfig(save_interval=2),
    )
    helper_test_simple_include_regex(out_dir, hook)
示例#18
0
def test_keras_fit_shapes(out_dir):
    hook = smd.KerasHook(
        out_dir=out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
    )
    helper_keras_fit(trial_dir=out_dir, hook=hook)
    print(create_trial_fast_refresh(out_dir).tensor_names(step=0))
    verify_shapes(out_dir, 0)
示例#19
0
def test_hook_write(out_dir):
    pre_test_clean_up()
    # set up hook
    hook = SessionHook(
        out_dir, save_all=True, include_collections=None, save_config=SaveConfig(save_interval=999)
    )
    helper_hook_write(out_dir, hook)
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="weights"))
示例#20
0
def test_keras_gradtape_shapes(out_dir):
    hook = smd.KerasHook(
        out_dir=out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
    )
    helper_keras_gradtape(trial_dir=out_dir, hook=hook)
    verify_shapes(out_dir, 0)
    verify_shapes(out_dir, 500)
def test_multi_collection_match(out_dir):
    pre_test_clean_up()
    hook = SessionHook(
        out_dir=out_dir,
        include_regex=["loss:0"],
        include_collections=["default", "trial"],
        save_config=SaveConfig(save_interval=2),
    )
    hook.get_collection("trial").include("loss:0")
    helper_test_multi_collection_match(out_dir, hook)
def test_should_save_tensor_behavior_without_prepare_collections(out_dir):
    """Always return false if an attempt to save a tensor is made before the collections are prepared.
    This can happen if the fn is called before callbacks are init."""
    hook = smd.KerasHook(out_dir,
                         save_config=SaveConfig(save_interval=3),
                         save_all=True)
    assert not hook.should_save_tensor_or_collection("dummy",
                                                     CollectionKeys.GRADIENTS)
    assert not hook.should_save_tensor_or_collection("dummy",
                                                     CollectionKeys.LAYERS)
示例#23
0
def test_tf_keras_shapes(out_dir):
    train_model(
        out_dir,
        save_all=True,
        reduction_config=ReductionConfig(save_shape=True),
        use_tf_keras=True,
        save_config=SaveConfig(save_steps=[0, 10]),
        eager=False,
        steps=["train", "eval", "predict", "train"],
    )
    verify_shapes(out_dir, 0)
def test_shapes(out_dir, tf_eager_mode):
    strategy, _ = train_model(
        out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
        steps=["train"],
        eager=tf_eager_mode,
    )
    multiworker = strategy.num_replicas_in_sync > 1
    verify_shapes(out_dir, 0, multiworker=multiworker)
示例#25
0
def helper_create_hook(out_dir, collections, include_regex=None):
    hook = smd.KerasHook(out_dir,
                         save_config=SaveConfig(save_interval=3),
                         include_collections=collections)

    if include_regex:
        for collection in collections:
            hook.get_collection(collection).include(include_regex)

    hook.register_model(model)
    hook.on_train_begin()
    return hook
def test_one_device(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.OUTPUTS,
            CollectionKeys.GRADIENTS,
        ],
        save_config=SaveConfig(save_interval=9),
        strategy=tf.distribute.OneDeviceStrategy(device="/cpu:0"),
        steps=["train"],
    )
    assert os.path.isdir(os.path.join(out_dir, "events")) is False
示例#27
0
def test_save_all_full(out_dir, hook=None):
    tf.reset_default_graph()
    if hook is None:
        hook = SessionHook(out_dir=out_dir,
                           save_all=True,
                           save_config=SaveConfig(save_interval=2))

    simple_model(hook)
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) > 50
    print(tr.tensor_names(collection="weights"))
    assert len(tr.tensor_names(collection="weights")) == 1
    assert len(tr.tensor_names(collection="gradients")) == 1
    assert len(tr.tensor_names(collection="losses")) == 1
def test_collection_add(out_dir):
    train_model(
        out_dir,
        include_collections=["relu"],
        save_config=SaveConfig(save_interval=9),
        create_relu_collection=True,
        steps=["train"],
    )

    tr = create_trial_fast_refresh(out_dir)
    relu_coll_tensor_names = tr.tensor_names(collection="relu")

    assert len(relu_coll_tensor_names) == 2
    assert tr.tensor(relu_coll_tensor_names[0]).value(0) is not None
    assert tr.tensor(relu_coll_tensor_names[1]).value(0) is not None
示例#29
0
def test_clash_with_custom_callback(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=[
            CollectionKeys.WEIGHTS,
            CollectionKeys.BIASES,
            CollectionKeys.OUTPUTS,
            CollectionKeys.GRADIENTS,
        ],
        save_config=SaveConfig(save_interval=9),
        steps=["train"],
        add_callbacks=["fetch_tensor"],
    )
    tr = create_trial_fast_refresh(out_dir)
    assert len(tr.tensor_names()) == 6 + 6 + strategy.num_replicas_in_sync * 1 + 3
def test_save_all(out_dir):
    strategy = train_model(
        out_dir,
        include_collections=None,
        save_all=True,
        save_config=SaveConfig(save_steps=[5]),
        steps=["train"],
    )
    tr = create_trial_fast_refresh(out_dir)
    print(tr.tensor_names())
    assert (len(tr.tensor_names()) == 6 + 6 + 5 + 3 + 1 +
            3 * strategy.num_replicas_in_sync +
            2 * strategy.num_replicas_in_sync)
    # weights, grads, optimizer_variables, metrics, losses, outputs
    assert len(tr.steps()) == 3