def helper_test_mnist_trial(trial_dir):
    tr = create_trial(trial_dir)
    assert len(tr.steps()) == 3
    assert len(tr.steps(mode=smd.modes.TRAIN)) == 2
    assert len(tr.steps(mode=smd.modes.EVAL)) == 1
    assert len(tr.tensor_names()) == 13
    on_s3, bucket, prefix = is_s3(trial_dir)
    if not on_s3:
        shutil.rmtree(trial_dir, ignore_errors=True)
    else:
        delete_s3_prefix(bucket, prefix)
def helper_test_multi_save_configs_trial(trial_dir):
    tr = create_trial(trial_dir)
    print(tr.steps(), tr.steps(mode=smd.modes.TRAIN),
          tr.steps(mode=smd.modes.EVAL))
    assert len(tr.steps()) == 4
    assert len(tr.steps(mode=smd.modes.TRAIN)) == 3
    assert len(tr.steps(mode=smd.modes.EVAL)) == 1
    assert len(tr.tensor_names()) == 1
    on_s3, bucket, prefix = is_s3(trial_dir)
    if not on_s3:
        shutil.rmtree(trial_dir)
    else:
        delete_s3_prefix(bucket, prefix)
Exemplo n.º 3
0
def test_multiple_s3_trials(num_trials=4, num_steps=5, num_tensors=5):
    data = [
        help_test_multiple_trials(num_steps, num_tensors)
        for i in range(num_trials)
    ]
    trials = [d[0] for d in data]
    names = [d[1] for d in data]
    evals = [
        check_trial(trial_obj, num_steps=num_steps, num_tensors=num_tensors)
        for trial_obj in trials
    ]

    # delete the folders after the test
    for name in names:
        delete_s3_prefix("smdebug-testing", "outputs/" + name)
def test_end_s3_training():
    run_id = str(uuid.uuid4())
    bucket = "smdebugcodebuildtest"
    key = "newlogsRunTest/" + run_id
    out_dir = bucket + "/" + key
    assert has_training_ended(out_dir) == False
    subprocess.check_call([
        sys.executable,
        "examples/mxnet/scripts/mnist_gluon_basic_hook_demo.py",
        "--output-uri",
        out_dir,
        "--num_steps",
        "10",
    ])
    assert has_training_ended(out_dir)
    delete_s3_prefix(bucket, key)
def test_end_s3_training():
    run_id = str(uuid.uuid4())
    bucket = "smdebug-testing"
    key = f"outputs/{uuid.uuid4()}"
    out_dir = "s3://" + bucket + "/" + key
    assert has_training_ended(out_dir) == False
    subprocess.check_call([
        sys.executable,
        "tests/resources/mxnet/mnist_gluon_basic_hook_demo.py",
        "--output-uri",
        out_dir,
        "--num_steps",
        "10",
    ])
    assert has_training_ended(out_dir)
    delete_s3_prefix(bucket, key)
Exemplo n.º 6
0
def test_s3():
    trial_name = str(uuid.uuid4())
    bucket = "smdebug-testing"
    path = "s3://" + os.path.join(bucket, "outputs/")
    num_steps = 20
    num_tensors = 10
    for i in range(num_steps):
        generate_data(
            path=path,
            trial=trial_name,
            num_tensors=10,
            step=i,
            tname_prefix="foo",
            worker="algo-1",
            shape=(3, 3, 3),
            rank=0,
        )
    check_s3_trial(os.path.join(path, trial_name),
                   num_steps=num_steps,
                   num_tensors=num_tensors)
    delete_s3_prefix("smdebug-testing", "outputs/" + trial_name)