def test_linear_classifier(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        # Setup
        train_input_fn, eval_input_fn = get_input_fns()
        x_feature = tf.feature_column.numeric_column("x", shape=(28, 28))
        estimator = tf.compat.v1.estimator.LinearClassifier(
            feature_columns=[x_feature],
            model_dir="/tmp/mnist_linear_classifier",
            n_classes=10)

        # Train
        if script_mode:
            hook = smd.EstimatorHook(out_dir=sim.out_dir)
            estimator.train(input_fn=train_input_fn, steps=100, hooks=[hook])
        else:
            estimator.train(input_fn=train_input_fn, steps=100)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
def test_estimator(script_mode: bool):
    """ Works as intended. """
    smd.del_hook()
    tf.reset_default_graph()
    with SagemakerSimulator() as sim:
        # Setup
        mnist_classifier = get_estimator()
        train_input_fn, eval_input_fn = get_input_fns()

        # Train and evaluate
        train_steps, eval_steps = 80, 20
        if script_mode:
            hook = smd.EstimatorHook(out_dir=sim.out_dir)
            hook.set_mode(mode=smd.modes.TRAIN)
            mnist_classifier.train(input_fn=train_input_fn,
                                   steps=train_steps,
                                   hooks=[hook])
            hook.set_mode(mode=smd.modes.EVAL)
            mnist_classifier.evaluate(input_fn=eval_input_fn,
                                      steps=eval_steps,
                                      hooks=[hook])
        else:
            mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
            mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        # Check that hook created and tensors saved
        trial = smd.create_trial(path=sim.out_dir)
        print(trial)
        assert smd.get_hook() is not None, "Hook was not created."
        assert len(trial.steps()) > 0, "Nothing saved at any step."
        assert len(trial.tensor_names()) > 0, "Tensors were not saved."
        assert trial.steps() == [0, train_steps], "Wrong step count for trial."
Exemplo n.º 3
0
def test_linear_classifier(out_dir, tf_eager_mode, saveall):
    """ Works as intended. """
    if tf_eager_mode is False:
        tf.compat.v1.disable_eager_execution()
        tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session()
    train_input_fn, eval_input_fn = get_input_fns()
    x_feature = tf.feature_column.numeric_column("x", shape=(28, 28))
    estimator = tf.estimator.LinearClassifier(
        feature_columns=[x_feature], model_dir="/tmp/mnist_linear_classifier", n_classes=10
    )
    hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall)
    estimator.train(input_fn=train_input_fn, steps=10, hooks=[hook])

    # Check that hook created and tensors saved
    trial = smd.create_trial(path=out_dir)
    tnames = trial.tensor_names()
    assert len(trial.steps()) > 0
    if saveall:
        # Number of tensors in each collection
        # vanilla TF 2.2: all = 214, loss = 2, weights = 1, gradients = 0, biases = 12, optimizer variables = 0, metrics = 0, others = 199
        # AWS-TF 2.2: all = 219, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 197
        # AWS-TF 2.1: all = 226, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 204
        assert len(tnames) >= 214
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 1
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 12
        assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0
    else:
        assert len(tnames) == 2
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 2
Exemplo n.º 4
0
def test_estimator(out_dir, tf_eager_mode, saveall):
    """ Works as intended. """
    if tf_eager_mode is False:
        tf.compat.v1.disable_eager_execution()
        tf.compat.v1.reset_default_graph()
    tf.keras.backend.clear_session()
    mnist_classifier = get_estimator()
    train_input_fn, eval_input_fn = get_input_fns()

    # Train and evaluate
    train_steps, eval_steps = 8, 2
    hook = smd.EstimatorHook(out_dir=out_dir, save_all=saveall)
    hook.set_mode(mode=smd.modes.TRAIN)
    mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook])
    hook.set_mode(mode=smd.modes.EVAL)
    mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook])

    # Check that hook created and tensors saved
    trial = smd.create_trial(path=out_dir)
    tnames = trial.tensor_names()
    assert len(trial.steps()) > 0
    if saveall:
        # Number of tensors in each collection
        # vanilla TF 2.2: all = 300, loss = 1, weights = 4, gradients = 0, biases = 18, optimizer variables = 0, metrics = 0, others = 277
        # AWS-TF 2.2 : all = 300, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 269
        # AWS-TF 2.1 : all = 309, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 278
        assert len(tnames) >= 300
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
        assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 4
        assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 18
        assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) >= 0
        assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) >= 0
    else:
        assert len(tnames) == 1
        assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1
Exemplo n.º 5
0
def test_keras_to_estimator(script_mode):
    """ Works as intended. """
    import tensorflow.compat.v1.keras as keras

    tf.reset_default_graph()
    smd.del_hook()
    keras.backend.clear_session()
    with SagemakerSimulator() as sim:
        model = keras.models.Sequential([
            keras.layers.Dense(16, activation="relu", input_shape=(4, )),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(1, activation="sigmoid"),
        ])

        def input_fn():
            split = tfds.Split.TRAIN
            data_dir = TEST_DATASET_S3_PATH if use_s3_datasets() else None
            dataset = tfds.load("iris",
                                data_dir=data_dir,
                                split=split,
                                as_supervised=True)
            dataset = dataset.map(lambda features, labels:
                                  ({
                                      "dense_input": features
                                  }, labels))
            dataset = dataset.batch(32).repeat()
            return dataset

        model.compile(loss="categorical_crossentropy", optimizer="adam")
        model.summary()

        keras_estimator = tf.keras.estimator.model_to_estimator(
            keras_model=model, model_dir=sim.out_dir)

        if script_mode:
            hook = smd.EstimatorHook(sim.out_dir)
            hook.set_mode(smd.modes.TRAIN)
            keras_estimator.train(input_fn=input_fn, steps=25, hooks=[hook])
            hook.set_mode(smd.modes.EVAL)
            eval_result = keras_estimator.evaluate(input_fn=input_fn,
                                                   steps=10,
                                                   hooks=[hook])
        else:
            keras_estimator.train(input_fn=input_fn, steps=25)
            keras_estimator.evaluate(input_fn=input_fn, steps=10)

        tr = smd.create_trial(sim.out_dir)
        assert len(tr.tensor_names()) == 1
        assert tr.steps() == [0, 25]
        assert len(tr.steps(smd.modes.TRAIN)) == 1
        assert len(tr.steps(smd.modes.EVAL)) == 1
Exemplo n.º 6
0
def test_keras_to_estimator(out_dir, tf_eager_mode):
    if not tf_eager_mode:
        tf.compat.v1.disable_eager_execution()
        tf.compat.v1.reset_default_graph()

    tf.keras.backend.clear_session()

    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(16, activation="relu", input_shape=(4, )),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ])

    def input_fn():
        split = tfds.Split.TRAIN
        data_dir = TEST_DATASET_S3_PATH if use_s3_datasets() else None
        dataset = tfds.load("iris",
                            data_dir=data_dir,
                            split=split,
                            as_supervised=True)
        dataset = dataset.map(lambda features, labels: ({
            "dense_input": features
        }, labels))
        dataset = dataset.batch(32).repeat()
        return dataset

    model.compile(loss="categorical_crossentropy", optimizer="adam")
    model.summary()

    keras_estimator = tf.keras.estimator.model_to_estimator(keras_model=model,
                                                            model_dir=out_dir)

    hook = smd.EstimatorHook(out_dir)

    hook.set_mode(smd.modes.TRAIN)
    keras_estimator.train(input_fn=input_fn, steps=25, hooks=[hook])

    hook.set_mode(smd.modes.EVAL)
    eval_result = keras_estimator.evaluate(input_fn=input_fn,
                                           steps=10,
                                           hooks=[hook])

    from smdebug.trials import create_trial

    tr = create_trial(out_dir)
    assert len(tr.tensor_names()) == 1
    assert len(tr.steps()) == 2
    assert len(tr.steps(smd.modes.TRAIN)) == 1
    assert len(tr.steps(smd.modes.EVAL)) == 1
Exemplo n.º 7
0
def helper_train(script_mode=False, sim=None, train_steps=80, eval_steps=20):
    # Setup
    mnist_classifier = get_estimator()
    train_input_fn, eval_input_fn = get_input_fns()

    # Train and evaluate

    if script_mode:
        hook = smd.EstimatorHook(out_dir=sim.out_dir)
        hook.set_mode(mode=smd.modes.TRAIN)
        mnist_classifier.train(input_fn=train_input_fn, steps=train_steps, hooks=[hook])
        hook.set_mode(mode=smd.modes.EVAL)
        mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps, hooks=[hook])
    else:
        mnist_classifier.train(input_fn=train_input_fn, steps=train_steps)
        mnist_classifier.evaluate(input_fn=eval_input_fn, steps=eval_steps)
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", type=float, default=0.001)
    parser.add_argument("--random_seed", type=bool, default=False)
    parser.add_argument("--out_dir", type=str)
    parser.add_argument("--save_interval", type=int, default=500)
    parser.add_argument("--num_epochs",
                        type=int,
                        default=5,
                        help="Number of epochs to train for")
    parser.add_argument(
        "--num_steps",
        type=int,
        help=
        "Number of steps to train for. If this is passed, it overrides num_epochs",
    )
    parser.add_argument(
        "--num_eval_steps",
        type=int,
        help="Number of steps to evaluate for. If this"
        "is passed, it doesnt evaluate over the full eval set",
    )
    parser.add_argument("--model_dir", type=str, default="/tmp/mnist_model")
    args = parser.parse_args()

    if args.random_seed:
        tf.set_random_seed(2)
        np.random.seed(2)
        random.seed(12)

    ##### Enabling SageMaker Debugger ###########
    # creating hook
    hook = smd.EstimatorHook(
        out_dir=args.out_dir,
        include_collections=["weights", "gradients"],
        save_config=smd.SaveConfig(save_interval=args.save_interval),
    )

    def cnn_model_fn(features, labels, mode):
        """Model function for CNN."""
        # Input Layer
        input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])

        # Convolutional Layer #1
        conv1 = tf.layers.conv2d(
            inputs=input_layer,
            filters=32,
            kernel_size=[5, 5],
            padding="same",
            activation=tf.nn.relu,
        )

        # Pooling Layer #1
        pool1 = tf.layers.max_pooling2d(inputs=conv1,
                                        pool_size=[2, 2],
                                        strides=2)

        # Convolutional Layer #2 and Pooling Layer #2
        conv2 = tf.layers.conv2d(inputs=pool1,
                                 filters=64,
                                 kernel_size=[5, 5],
                                 padding="same",
                                 activation=tf.nn.relu)
        pool2 = tf.layers.max_pooling2d(inputs=conv2,
                                        pool_size=[2, 2],
                                        strides=2)

        # Dense Layer
        pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
        dense = tf.layers.dense(inputs=pool2_flat,
                                units=1024,
                                activation=tf.nn.relu)
        dropout = tf.layers.dropout(
            inputs=dense,
            rate=0.4,
            training=mode == tf.estimator.ModeKeys.TRAIN)

        # Logits Layer
        logits = tf.layers.dense(inputs=dropout, units=10)

        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
            # `logging_hook`.
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=args.lr)

            ##### Enabling SageMaker Debugger ###########
            # Wrap your optimizer as follows to help SageMaker Debugger identify gradients
            # This does not change your optimization logic, it returns back the same optimizer
            optimizer = hook.wrap_optimizer(optimizer)

            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        eval_metric_ops = {
            "accuracy":
            tf.metrics.accuracy(labels=labels,
                                predictions=predictions["classes"])
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    # Load training and eval data
    ((train_data, train_labels),
     (eval_data, eval_labels)) = tf.keras.datasets.mnist.load_data()

    train_data = train_data / np.float32(255)
    train_labels = train_labels.astype(np.int32)  # not required

    eval_data = eval_data / np.float32(255)
    eval_labels = eval_labels.astype(np.int32)  # not required

    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              model_dir=args.model_dir)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=128,
        num_epochs=args.num_epochs,
        shuffle=True,
    )

    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data},
                                                       y=eval_labels,
                                                       num_epochs=1,
                                                       shuffle=False)

    ##### Enabling SageMaker Debugger ###########
    # Set training mode so SMDebug can classify the steps into training mode
    hook.set_mode(smd.modes.TRAIN)

    ##### Enabling SageMaker Debugger ###########
    # pass hook to hooks parameter of train method
    mnist_classifier.train(input_fn=train_input_fn,
                           steps=args.num_steps,
                           hooks=[hook])

    ##### Enabling SageMaker Debugger ###########
    # Set eval mode so SMDebug can classify the steps into eval mode
    hook.set_mode(smd.modes.EVAL)

    ##### Enabling SageMaker Debugger ###########
    # pass hook to hooks parameter of evaluate method
    mnist_classifier.evaluate(input_fn=eval_input_fn,
                              steps=args.num_eval_steps,
                              hooks=[hook])
Exemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_dir", type=str, help="S3 path for the model")
    parser.add_argument("--lr",
                        type=float,
                        help="Learning Rate",
                        default=0.001)
    parser.add_argument("--steps",
                        type=int,
                        help="Number of steps to run",
                        default=100)
    parser.add_argument("--scale",
                        type=float,
                        help="Scaling factor for inputs",
                        default=1.0)
    parser.add_argument("--random_seed", type=bool, default=False)
    parser.add_argument("--out_dir", type=str)
    parser.add_argument("--save_interval", type=int, default=500)
    args = parser.parse_args()

    # these random seeds are only intended for test purpose.
    # for now, 2,2,12 could promise no assert failure when running tests
    # if you wish to change the number, notice that certain steps' tensor value may be capable of variation
    if args.random_seed:
        tf.set_random_seed(2)
        np.random.seed(2)
        random.seed(12)

    hook = smd.EstimatorHook(
        out_dir=args.out_dir,
        include_collections=["weights", "gradients"],
        save_config=smd.SaveConfig(save_interval=args.save_interval),
    )

    # Network definition
    # Note the use of name scopes
    with tf.name_scope("foobar"):
        x = tf.placeholder(shape=(None, 2), dtype=tf.float32)
        w = tf.Variable(initial_value=[[10.0], [10.0]], name="weight1")
    with tf.name_scope("foobaz"):
        w0 = [[1], [1.0]]
        y = tf.matmul(x, w0)
    loss = tf.reduce_mean((tf.matmul(x, w) - y)**2, name="loss")

    hook.add_to_collection("losses", loss)

    global_step = tf.Variable(17, name="global_step", trainable=False)
    increment_global_step_op = tf.assign(global_step, global_step + 1)

    optimizer = tf.train.AdamOptimizer(args.lr)

    # Wrap the optimizer with wrap_optimizer so smdebug can find gradients to save
    optimizer = hook.wrap_optimizer(optimizer)

    # use this wrapped optimizer to minimize loss
    optimizer_op = optimizer.minimize(loss,
                                      global_step=increment_global_step_op)

    # pass the hook to hooks parameter of monitored session
    sess = tf.train.MonitoredSession(hooks=[hook])

    # use this session for running the tensorflow model
    hook.set_mode(smd.modes.TRAIN)
    for i in range(args.steps):
        x_ = np.random.random((10, 2)) * args.scale
        _loss, opt, gstep = sess.run(
            [loss, optimizer_op, increment_global_step_op], {x: x_})
        print(f"Step={i}, Loss={_loss}")

    hook.set_mode(smd.modes.EVAL)
    for i in range(args.steps):
        x_ = np.random.random((10, 2)) * args.scale
        sess.run([loss, increment_global_step_op], {x: x_})