def test_get_input_streams(use_test_config_dir): assert valohai.inputs("single_image").stream().read(10000) assert len(list(valohai.inputs("input_with_archive").streams())) == 2 for stream in valohai.inputs("input_with_archive").streams(): assert stream.read(10000) assert valohai.inputs("single_image").stream().read() assert not valohai.inputs("nonono").stream()
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step batch_inference.py` valohai.prepare( step='batch-inference', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'model': { 'default': None, 'optional': False, }, 'images': [ 'https://valohaidemo.blob.core.windows.net/mnist/four-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-inverted.png', 'https://valohaidemo.blob.core.windows.net/mnist/five-normal.jpg', ], }, ) print('Loading model') model_path = valohai.inputs('model').path() model = load_model(model_path) json_blob = {} for image_path in valohai.inputs('images').paths(): filename = os.path.basename(image_path) extension = os.path.splitext(image_path)[1].lower() if extension not in ['.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff']: print(f'{filename} is not an image file') continue print(f'Running inference for {filename}') try: image, inverted = process_image(Image.open(image_path)) prediction = predict_image(model, image, inverted) json_blob[filename] = prediction print(filename, prediction) except Exception as exc: json_blob[filename] = {'error': exc} print(f'Unable to process {filename}: {exc}', file=sys.stderr) print('Saving predictions') suffix = '' try: suffix = f'-{model_path.split("model-")[1].split(".h5")[0]}' except IndexError: print(f'Unable to get suffix from {model_path}') json_path = os.path.join( valohai.outputs().path(f'predictions{suffix}.json')) with open(json_path, 'w') as json_file: json.dump(json_blob, json_file, sort_keys=True)
def test_zip_no_mangling(use_test_config_dir): paths = set(valohai.inputs("input_with_archive").paths()) for suffix in ( "1hello.txt", "2world.txt", "blerp/3katt.txt", "blerp/blonk/4blöf.txt", "blerp/blonk/asdf.jpg", ): assert any(p.endswith(suffix) for p in paths)
def test_get_input_paths(use_test_config_dir): assert valohai.inputs("single_image").path().endswith( "single_image/Example.jpg") assert os.path.exists(valohai.inputs("single_image").path()) assert (valohai.inputs("single_image").path( default="unused_default").endswith("single_image/Example.jpg")) assert not valohai.inputs("nonono").path() assert valohai.inputs("nonono").path( default="default_123") == "default_123" assert os.path.exists(valohai.inputs("input_with_archive").path()) for path in valohai.inputs("input_with_archive").paths(): assert os.path.exists(path) assert len(list(valohai.inputs("input_with_archive").paths())) == 2
def test_get_input_streams(use_test_config_dir): assert valohai.inputs("single_image").stream().read(10000) assert len(list(valohai.inputs("input_with_archive").streams())) == 5 for stream in valohai.inputs("input_with_archive").streams(): assert stream.read(10000) assert valohai.inputs("single_image").stream().read() assert not valohai.inputs("nonono").stream() assert (len( list( valohai.inputs("images_in_subdirs").streams( "hello/**/hello/*.jpg"))) == 2) assert len( list(valohai.inputs("images_in_subdirs").streams( "hello/**/*.jpg"))) == 2 assert len(list( valohai.inputs("images_in_subdirs").streams("**/*.jpg"))) == 2 for stream in valohai.inputs("images_in_subdirs").streams("**/*.jpg"): assert stream.read(10000)
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step preprocess_dataset.py` valohai.prepare( step='preprocess-dataset', image='python:3.9', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/mnist.npz', }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation print('Loading data') with np.load(valohai.inputs('dataset').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] print('Preprocessing data') x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store print('Saving preprocessed data') path = valohai.outputs().path('preprocessed_mnist.npz') np.savez_compressed(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
import numpy as np import valohai as vh vh.prepare(step='Preprocess data') # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation with np.load(vh.inputs('mnist').path(), allow_pickle=True) as file: x_train, y_train = file['x_train'], file['y_train'] x_test, y_test = file['x_test'], file['y_test'] x_train, x_test = x_train / 255.0, x_test / 255.0 # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store path = vh.outputs('dataset').path('preprocessed_mnist.npz') np.savez(path, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
import numpy as np import tensorflow as tf import valohai as vh vh.prepare(step='Train model') # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation with np.load(vh.inputs('preprocessed_mnist').path(), allow_pickle=True) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax') ]) optimizer = tf.keras.optimizers.Adam( learning_rate=vh.parameters('learning_rate').value) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Print metrics out as JSON # This enables Valohai to version your metadata # and for you to use it to compare experiments
def test_get_input_paths(use_test_config_dir): assert valohai.inputs("single_image").path().endswith( "single_image/Example.jpg") assert os.path.exists(valohai.inputs("single_image").path()) assert os.path.exists(valohai.inputs("single_image").dir_path()) assert os.path.isdir(valohai.inputs("single_image").dir_path()) assert (valohai.inputs("single_image").path( default="unused_default").endswith("single_image/Example.jpg")) assert (valohai.inputs("single_image").path("Example.jpg").endswith( "single_image/Example.jpg")) assert (valohai.inputs("single_image").path("*.jpg").endswith( "single_image/Example.jpg")) assert (valohai.inputs("single_image").path("E*").endswith( "single_image/Example.jpg")) assert valohai.inputs("single_image").path("*").endswith( "single_image/Example.jpg") assert not valohai.inputs("single_image").path("notbefound*") assert next(valohai.inputs("single_image").paths("Example.jpg")).endswith( "single_image/Example.jpg") assert next(valohai.inputs("single_image").paths("*.jpg")).endswith( "single_image/Example.jpg") assert next(valohai.inputs("single_image").paths("E*")).endswith( "single_image/Example.jpg") assert next(valohai.inputs("single_image").paths("*")).endswith( "single_image/Example.jpg") assert len(list(valohai.inputs("single_image").paths("notbefound*"))) == 0 assert not valohai.inputs("nonono").path() assert valohai.inputs("nonono").path( default="default_123") == "default_123" assert os.path.exists(valohai.inputs("input_with_archive").path()) assert len(list(valohai.inputs("input_with_archive").paths())) == 5 assert len(list( valohai.inputs("input_with_archive").paths("**/*.txt"))) == 2 assert len(list( valohai.inputs("input_with_archive").paths("**/a*.jpg"))) == 1 assert next(valohai.inputs("input_with_archive").paths( "**/a*.jpg")).endswith("blerp/blonk/asdf.jpg") assert next(valohai.inputs("input_with_archive").paths( "**/asdf.jpg")).endswith("blerp/blonk/asdf.jpg") assert next( valohai.inputs("input_with_archive").paths( "blerp/blonk/asdf.jpg")).endswith("blerp/blonk/asdf.jpg") for path in valohai.inputs("input_with_archive").paths(): assert os.path.exists(path) assert len(list(valohai.inputs("input_with_archive").paths())) == 5 for path in valohai.inputs("input_with_archive").paths("**/*.jpg"): assert os.path.exists(path) assert next( valohai.inputs("images_in_subdirs").paths( "hello/label1/hello/*.jpg")).endswith("label1/hello/foo.jpg") assert next( valohai.inputs("images_in_subdirs").paths( "hello/label2/hello/*.jpg")).endswith("label2/hello/foo.jpg") assert (len( list( valohai.inputs("images_in_subdirs").paths("hello/**/hello/*.jpg"))) == 2) assert len( list(valohai.inputs("images_in_subdirs").paths("hello/**/*.jpg"))) == 2 assert len(list( valohai.inputs("images_in_subdirs").paths("**/*.jpg"))) == 2 for path in valohai.inputs("images_in_subdirs").paths("**/*.jpg"): assert os.path.exists(path)
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step train_model.py` valohai.prepare( step='train-model', image='tensorflow/tensorflow:2.6.0', default_inputs={ 'dataset': 'https://valohaidemo.blob.core.windows.net/mnist/preprocessed_mnist.npz', }, default_parameters={ 'learning_rate': 0.001, 'epochs': 5, }, ) # Read input files from Valohai inputs directory # This enables Valohai to version your training data # and cache the data for quick experimentation input_path = valohai.inputs('dataset').path() with np.load(input_path, allow_pickle=True) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10), ]) optimizer = tf.keras.optimizers.Adam( learning_rate=valohai.parameters('learning_rate').value) loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy']) # Print metrics out as JSON # This enables Valohai to version your metadata # and for you to use it to compare experiments callback = tf.keras.callbacks.LambdaCallback(on_epoch_end=log_metadata) model.fit(x_train, y_train, epochs=valohai.parameters('epochs').value, callbacks=[callback]) # Evaluate the model and print out the test metrics as JSON test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2) with valohai.logger() as logger: logger.log('test_accuracy', test_accuracy) logger.log('test_loss', test_loss) # Write output files to Valohai outputs directory # This enables Valohai to version your data # and upload output it to the default data store suffix = uuid.uuid4() output_path = valohai.outputs().path(f'model-{suffix}.h5') model.save(output_path)
def test_prepare(tmpdir, monkeypatch): inputs_dir = str(tmpdir.mkdir("inputs")) monkeypatch.setenv("VH_INPUTS_DIR", inputs_dir) local_file = tmpdir.mkdir("sub").join("hello.txt") local_file.write("tiku ja taku ja joku") data_dir = tmpdir.mkdir("data") local_data = data_dir.join("data1.dat") local_data.write("I'm a big data") local_data2 = data_dir.join("data2.dat") local_data2.write("I'm a huge data") parameters = { "iambool": True, "mestringy": "asdf", "integerboi": 123, "floaty": 0.0001, "makemetrue": False, "makemefalse": True, "makemeqwer": "asdf", "makeme321": 123, "makemenegative": 0.0001, } inputs = { "example": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "overrideme": "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz", "myimages": [ "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg", "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png", ], "localdata_as_list": [str(local_data), str(local_data2)], "localdata_with_wildcard": os.path.join(str(data_dir), "*.dat"), } with monkeypatch.context() as m: args = [ "", "--makemetrue=true", "--makemefalse=false", "--makemeqwer=qwer", "--makeme321=321", "--makemenegative=-0.123", "--some_totally_random_parameter_to_ignore=666", f"--overrideme={str(local_file)}", ] m.setattr( sys, "argv", args, ) valohai.prepare(step="test", default_parameters=parameters, default_inputs=inputs) assert valohai.parameters("iambool").value is True assert valohai.parameters("mestringy").value == "asdf" assert valohai.parameters("integerboi").value == 123 assert valohai.parameters("floaty").value == 0.0001 assert valohai.parameters("makemetrue").value is True assert valohai.parameters("makemefalse").value is False assert valohai.parameters("makemeqwer").value == "qwer" assert valohai.parameters("makeme321").value == 321 assert valohai.parameters("makemenegative").value < 0.0 assert (get_input_info("example").files[0].uri == "https://valohai-mnist.s3.amazonaws.com/t10k-images-idx3-ubyte.gz") assert (get_input_info("myimages").files[0].uri == "https://upload.wikimedia.org/wikipedia/commons/8/84/Example.svg") assert ( get_input_info("myimages").files[1].uri == "https://upload.wikimedia.org/wikipedia/commons/0/01/Example_Wikipedia_sandbox_move_UI.png" ) assert not get_input_info("overrideme").files[0].uri assert os.path.isfile(get_input_info("overrideme").files[0].path) assert sum(1 for _ in valohai.inputs("localdata_as_list").paths()) == 2 assert sum(1 for _ in valohai.inputs("localdata_with_wildcard").paths()) == 2 for p in valohai.inputs("localdata_as_list").paths(): assert os.path.isfile(p) for p in valohai.inputs("localdata_with_wildcard").paths(): assert os.path.isfile(p)
def main(): # valohai.prepare enables us to update the valohai.yaml configuration file with # the Valohai command-line client by running `valohai yaml step compare_predictions.py` valohai.prepare( step='compare-predictions', image='python:3.9', default_inputs={ 'predictions': { 'default': None, 'optional': False, }, 'models': [], }, ) # here we have some simple example logic to compare predictions to figure out which # predictions are the best, so this varies from use-case to use-case BestModel = namedtuple('BestModel', 'prediction, average_best_guess, model') best_of_best = BestModel(prediction=None, average_best_guess=None, model=None) average_best_guesses = dict() model_filename = '' for prediction_path in valohai.inputs('predictions').paths(): filename = os.path.basename(prediction_path) extension = os.path.splitext(prediction_path)[1].lower() if extension != '.json': print(f'{filename} is not a JSON file') continue with open(prediction_path, 'r') as file: blob = json.load(file) best_guess_probabilities = [] for sample_filename, prediction in blob.items(): best_guess = str(prediction['best_guess']) probability = prediction['predictions'][best_guess] best_guess_probabilities.append(float32(probability)) average_best_guess = sum(best_guess_probabilities) / len( best_guess_probabilities) average_best_guesses[filename] = average_best_guess print( f'{filename} => {average_best_guess} (average best guess probability)' ) suffix = filename.split('predictions-')[1].split('.json')[0] model_filename = f"model-{suffix}.h5" if not best_of_best.average_best_guess or average_best_guess > best_of_best.average_best_guess: best_of_best = BestModel( prediction=filename, average_best_guess=average_best_guess, model=model_filename, ) print( f'The best model is the one that generated {best_of_best.prediction} ({best_of_best.average_best_guess})' ) model_path = next((model for model in valohai.inputs('models').paths() if model_filename in model), '') if model_path: shutil.copy(model_path, valohai.outputs().path(model_filename))