Пример #1
0
def test_keras_fit_shapes(out_dir):
    hook = smd.KerasHook(
        out_dir=out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
    )
    helper_keras_fit(trial_dir=out_dir, hook=hook)
    print(create_trial_fast_refresh(out_dir).tensor_names(step=0))
    verify_shapes(out_dir, 0)
Пример #2
0
def test_keras_gradtape_shapes(out_dir):
    hook = smd.KerasHook(
        out_dir=out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
    )
    helper_keras_gradtape(trial_dir=out_dir, hook=hook)
    verify_shapes(out_dir, 0)
    verify_shapes(out_dir, 500)
def test_shapes(out_dir, tf_eager_mode):
    strategy, _ = train_model(
        out_dir,
        save_all=True,
        save_config=SaveConfig(save_steps=[0]),
        reduction_config=ReductionConfig(save_shape=True),
        steps=["train"],
        eager=tf_eager_mode,
    )
    multiworker = strategy.num_replicas_in_sync > 1
    verify_shapes(out_dir, 0, multiworker=multiworker)
Пример #4
0
def test_tf_keras_shapes(out_dir):
    train_model(
        out_dir,
        save_all=True,
        reduction_config=ReductionConfig(save_shape=True),
        use_tf_keras=True,
        save_config=SaveConfig(save_steps=[0, 10]),
        eager=False,
        steps=["train", "eval", "predict", "train"],
    )
    verify_shapes(out_dir, 0)
Пример #5
0
def test_shapes(out_dir, save_raw_tensor=False):
    pre_test_clean_up()
    rdnc = smd.ReductionConfig(save_shape=True,
                               save_raw_tensor=save_raw_tensor)
    hook = smd.SessionHook(
        out_dir=out_dir,
        save_config=smd.SaveConfig(save_interval=1),
        reduction_config=rdnc,
        include_collections=["weights", "gradients", "losses"],
    )
    simple_model(hook)
    verify_shapes(out_dir, 0)
Пример #6
0
def test_save_shapes(out_dir):
    global_reduce_config = ReductionConfig(save_shape=True)
    global_save_config = SaveConfig(save_steps=[0, 1])

    hook = t_hook(
        out_dir=out_dir,
        save_config=global_save_config,
        save_all=True,
        reduction_config=global_reduce_config,
    )
    run_mnist_gluon_model(hook=hook, num_steps_train=5)
    verify_shapes(out_dir, 0)
    verify_shapes(out_dir, 1)
    shutil.rmtree(out_dir)
def test_mnist_shapes(out_dir, on_s3=False):
    if on_s3:
        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        bucket = "smdebug-testing"
        prefix = "outputs/hooks/estimator_modes/" + run_id
        out_dir = f"s3://{bucket}/{prefix}"
    help_test_mnist(
        out_dir,
        save_all=True,
        save_config=smd.SaveConfig(save_steps=[0]),
        num_steps=1,
        steps=None,
        reduction_config=smd.ReductionConfig(save_shape=True),
    )
    verify_shapes(out_dir, 0)
Пример #8
0
def test_save_shapes(hook=None, out_dir=None):
    class ChildA(nn.Module):
        def __init__(self):
            super(ChildA, self).__init__()
            self.child2 = ChildB()
            self.relu0 = nn.ReLU()

        def forward(self, x):
            return self.relu0(self.child2(x))

    class ChildB(nn.Module):
        def __init__(self):
            super(ChildB, self).__init__()
            self.conv1 = nn.Conv2d(1, 20, 5, 1)

        def forward(self, x):
            return self.conv1(x)

    class NestedNet(nn.Module):
        def __init__(self):
            super(NestedNet, self).__init__()
            self.child1 = ChildA()
            self.max_pool = nn.MaxPool2d(2, stride=2)
            self.conv2 = nn.Conv2d(20, 50, 5, 1)
            relu_module = nn.ReLU()
            self.relu1 = nn.ReLU()
            self.max_pool2 = nn.MaxPool2d(2, stride=2)
            self.fc1 = nn.Linear(4 * 4 * 50, 500)
            self.relu2 = nn.ReLU()
            self.fc2 = nn.Linear(500, 10)

        def forward(self, x):
            x = self.child1(x)
            x = self.max_pool(x)
            x = self.relu1(self.conv2(x))
            x = self.max_pool2(x)
            x = x.view(-1, 4 * 4 * 50)
            x = self.relu2(self.fc1(x))
            x = self.fc2(x)
            return F.log_softmax(x, dim=1)

    hook_created = False
    if hook is None:
        global_reduce_config = ReductionConfig(save_shape=True)
        global_save_config = SaveConfig(save_steps=[0])

        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        out_dir = "/tmp/" + run_id
        hook = t_hook(
            out_dir=out_dir,
            save_config=global_save_config,
            save_all=True,
            reduction_config=global_reduce_config,
        )
        hook_created = True

    model = NestedNet().to(torch.device("cpu"))
    hook.register_module(model)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    train(model, hook, torch.device("cpu"), optimizer, num_steps=10)
    # different versions seem to output different number of loss tensors
    verify_shapes(out_dir, 0)
    if hook_created:
        shutil.rmtree(out_dir)