示例#1
0
def train_random_forest(ntrees):
    with kiwi.start_run():
        rf = H2ORandomForestEstimator(ntrees=ntrees)
        train_cols = [n for n in wine.col_names if n != "quality"]
        rf.train(train_cols,
                 "quality",
                 training_frame=train,
                 validation_frame=test)

        kiwi.log_param("ntrees", ntrees)

        kiwi.log_metric("rmse", rf.rmse())
        kiwi.log_metric("r2", rf.r2())
        kiwi.log_metric("mae", rf.mae())

        kiwi.h2o.log_model(rf, "model")
示例#2
0
def test_log_param():
    with start_run() as active_run:
        run_id = active_run.info.run_id
        kiwi.log_param("name_1", "a")
        kiwi.log_param("name_2", "b")
        kiwi.log_param("nested/nested/name", 5)
    finished_run = tracking.MlflowClient().get_run(run_id)
    # Validate params
    assert finished_run.data.params == {
        "name_1": "a",
        "name_2": "b",
        "nested/nested/name": "5"
    }
示例#3
0
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5

    with kiwi.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        kiwi.log_param("alpha", alpha)
        kiwi.log_param("l1_ratio", l1_ratio)
        kiwi.log_metric("rmse", rmse)
        kiwi.log_metric("r2", r2)
        kiwi.log_metric("mae", mae)

        tracking_url_type_store = urlparse(kiwi.get_tracking_uri()).scheme

        # Model registry does not work with file store
        if tracking_url_type_store != "file":

            # Register the model
            # There are other ways to use the Model Registry, which depends on the use case,
            # please refer to the doc for more information:
            # https://mlflow.org/docs/latest/model-registry.html#api-workflow
            kiwi.sklearn.log_model(lr,
    test_accuracy = 100.0 * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), test_accuracy))
    step = (epoch + 1) * len(train_loader)
    log_scalar('test_loss', test_loss, step)
    log_scalar('test_accuracy', test_accuracy, step)

def log_scalar(name, value, step):
    """Log a scalar value to both MLflow and TensorBoard"""
    writer.add_scalar(name, value, step)
    kiwi.log_metric(name, value)

with kiwi.start_run():
    # Log our parameters into mlflow
    for key, value in vars(args).items():
        kiwi.log_param(key, value)

    # Create a SummaryWriter to write TensorBoard events locally
    output_dir = dirpath = tempfile.mkdtemp()
    writer = SummaryWriter(output_dir)
    print("Writing TensorBoard events locally to %s\n" % output_dir)

    # Perform the training
    for epoch in range(1, args.epochs + 1):
        train(epoch)
        test(epoch)

    # Upload the TensorBoard event logs as a run artifact
    print("Uploading TensorBoard events as a run artifact...")
    kiwi.log_artifacts(output_dir, artifact_path="events")
    print("\nLaunch TensorBoard with:\n\ntensorboard --logdir=%s" %
示例#5
0
    def objective(args):
        # start a run
        with kiwi.start_run(experiment_id=current_experiment_id):
            # register hyperparams
            for key, value in args.items():
                kiwi.log_param(key, value)

            # Define model
            model = RankedNetworkCNNModule(args['learning_rate'],
                                           dataset.get_embeddings(),
                                           hidden_dim=args['hidden'],
                                           output_labels=2)

            # Train (obviously)
            trainer = pl.Trainer(max_epochs=15, logger=KiwiLogger())
            trainer.fit(model, train_loader, val_loader)

            # Evaluation on held-out test-set
            with torch.no_grad():
                model.eval()
                results = pd.DataFrame(columns=['labels', 'predictions'])
                for batch_idx, batch in enumerate(test_loader):
                    y_hat = model(batch['a'], batch['b'])

                    results: pd.DataFrame = results.append(pd.DataFrame({
                        'labels':
                        batch['label'].flatten(),
                        'predictions':
                        y_hat.detach().argmax(axis=1)
                    }),
                                                           ignore_index=True)
                results.to_csv()

                # With a nice confusion matrix
                confusion_matrix(y_pred=results['predictions'].values,
                                 y_true=results['labels'].values,
                                 classes=[0, 1])

                cm = ConfusionMatrix(
                    actual_vector=results['labels'].values,
                    predict_vector=results['predictions'].values)

                output_test_results = "cm.txt"
                cm.save_stat(output_test_results)

                output_test_predictions_file = "test_predictions.txt"
                np.savetxt(output_test_predictions_file,
                           results['predictions'].values,
                           delimiter=",")

                kiwi.log_metric(key="test_acc", value=cm.Overall_ACC)
                kiwi.log_metric(key="test_f1_micro", value=cm.F1_Micro)
                kiwi.log_metric(key="test_f1_macro", value=cm.F1_Macro)
                kiwi.log_metric(key="test_ci_pm",
                                value=cm.CI95[1] - cm.Overall_ACC)
                kiwi.log_metric(key="test_ci_pm",
                                value=cm.CI95[1] - cm.Overall_ACC)
                kiwi.log_artifact(output_test_predictions_file)
                kiwi.log_artifact(output_test_results + ".pycm")

            return cm.Overall_ACC
示例#6
0
def log_params(parameters):
    for k, v in parameters.items():
        kiwi.log_param(k, v)
示例#7
0
                'experiement': [rand()],
                'run': [rand()],
                'because': [rand()],
                'we': [rand()],
                'need': [rand()],
                'to': [rand()],
                'check': [rand()],
                'how': [rand()],
                'it': [rand()],
                'handles': [rand()],
            }
            log_metrics(metrics)

        with kiwi.start_run(run_name='sort_child.py', nested=True):
            kiwi.log_metric('test_metric', 1)
            kiwi.log_param('test_param', 1)

        with kiwi.start_run(run_name='sort_child.py', nested=True):
            kiwi.log_metric('test_metric', 2)
            kiwi.log_param('test_param', 2)

    # Grandchildren
    with kiwi.start_run(run_name='parent'):
        with kiwi.start_run(run_name='child', nested=True):
            with kiwi.start_run(run_name='grandchild', nested=True):
                pass

    # Loop
    loop_1_run_id = None
    loop_2_run_id = None
    with kiwi.start_run(run_name='loop-1') as run_1:
示例#8
0
def test_search_runs():
    kiwi.set_experiment("exp-for-search")
    # Create a run and verify that the current active experiment is the one we just set
    logged_runs = {}
    with kiwi.start_run() as active_run:
        logged_runs["first"] = active_run.info.run_id
        kiwi.log_metric("m1", 0.001)
        kiwi.log_metric("m2", 0.002)
        kiwi.log_metric("m1", 0.002)
        kiwi.log_param("p1", "a")
        kiwi.set_tag("t1", "first-tag-val")
    with kiwi.start_run() as active_run:
        logged_runs["second"] = active_run.info.run_id
        kiwi.log_metric("m1", 0.008)
        kiwi.log_param("p2", "aa")
        kiwi.set_tag("t2", "second-tag-val")

    def verify_runs(runs, expected_set):
        assert set([r.info.run_id for r in runs
                    ]) == set([logged_runs[r] for r in expected_set])

    experiment_id = MlflowClient().get_experiment_by_name(
        "exp-for-search").experiment_id

    # 2 runs in this experiment
    assert len(MlflowClient().list_run_infos(experiment_id,
                                             ViewType.ACTIVE_ONLY)) == 2

    # 2 runs that have metric "m1" > 0.001
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.0001")
    verify_runs(runs, ["first", "second"])

    # 1 run with has metric "m1" > 0.002
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.002")
    verify_runs(runs, ["second"])

    # no runs with metric "m1" > 0.1
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.1")
    verify_runs(runs, [])

    # 1 run with metric "m2" > 0
    runs = MlflowClient().search_runs([experiment_id], "metrics.m2 > 0")
    verify_runs(runs, ["first"])

    # 1 run each with param "p1" and "p2"
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 != 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 = 'aa'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # 1 run each with tag "t1" and "t2"
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t1 = 'first-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "tags.t2 != 'qwerty'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t2 = 'second-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # delete "first" run
    MlflowClient().delete_run(logged_runs["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.DELETED_ONLY)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ACTIVE_ONLY)
    verify_runs(runs, [])
示例#9
0
import os
import shutil
import sys
import random
import tempfile

import kiwi
from kiwi import log_metric, log_param, log_artifacts, get_artifact_uri, active_run,\
    get_tracking_uri, log_artifact

if __name__ == "__main__":
    print("Running {} with tracking URI {}".format(sys.argv[0],
                                                   get_tracking_uri()))
    log_param("param1", 5)
    log_metric("foo", 5)
    log_metric("foo", 6)
    log_metric("foo", 7)
    log_metric("random_int", random.randint(0, 100))
    run_id = active_run().info.run_id
    # Get run metadata & data from the tracking server
    service = kiwi.tracking.MlflowClient()
    run = service.get_run(run_id)
    print("Metadata & data for run with UUID %s: %s" % (run_id, run))
    local_dir = tempfile.mkdtemp()
    message = "test artifact written during run %s within artifact URI %s\n" \
              % (active_run().info.run_id, get_artifact_uri())
    try:
        file_path = os.path.join(local_dir, "some_output_file.txt")
        with open(file_path, "w") as handle:
            handle.write(message)
        log_artifacts(local_dir, "some_subdir")
示例#10
0
 def log_hyperparams(self, params):
     for key, value in vars(params).items():
         kiwi.log_param(key, value)
示例#11
0
import os
from random import random, randint

from kiwi import log_metric, log_param, log_artifacts

if __name__ == "__main__":
    print("Running mlflow_tracking.py")

    log_param("param1", randint(0, 100))

    log_metric("foo", random())
    log_metric("foo", random() + 1)
    log_metric("foo", random() + 2)

    if not os.path.exists("outputs"):
        os.makedirs("outputs")
    with open("outputs/test.txt", "w") as f:
        f.write("hello world!")

    log_artifacts("outputs")
示例#12
0
文件: train.py 项目: iPieter/kiwi
def train(image_files,
          labels,
          domain,
          image_width=224,
          image_height=224,
          epochs=1,
          batch_size=16,
          test_ratio=0.2,
          seed=None):
    """
    Train VGG16 model on provided image files. This will create a new MLflow run and log all
    parameters, metrics and the resulting model with MLflow. The resulting model is an instance
    of KerasImageClassifierPyfunc - a custom python function model that embeds all necessary
    preprocessing together with the VGG16 Keras model. The resulting model can be applied
    directly to image base64 encoded image data.

    :param image_height: Height of the input image in pixels.
    :param image_width: Width of the input image in pixels.
    :param image_files: List of image files to be used for training.
    :param labels: List of labels for the image files.
    :param domain: Dictionary representing the domain of the reponse.
                   Provides mapping label-name -> label-id.
    :param epochs: Number of epochs to train the model for.
    :param batch_size: Batch size used during training.
    :param test_ratio: Fraction of dataset to be used for validation. This data will not be used
                       during training.
    :param seed: Random seed. Used e.g. when splitting the dataset into train / validation.
    """
    assert len(set(labels)) == len(domain)

    input_shape = (image_width, image_height, 3)

    with kiwi.start_run() as run:
        kiwi.log_param("epochs", str(epochs))
        kiwi.log_param("batch_size", str(batch_size))
        kiwi.log_param("validation_ratio", str(test_ratio))
        if seed:
            kiwi.log_param("seed", str(seed))

        def _read_image(filename):
            with open(filename, "rb") as f:
                return f.read()

        with tf.Graph().as_default() as g:
            with tf.Session(graph=g).as_default():
                dims = input_shape[:2]
                x = np.array([
                    decode_and_resize_image(_read_image(x), dims)
                    for x in image_files
                ])
                y = np_utils.to_categorical(np.array(labels),
                                            num_classes=len(domain))
                train_size = 1 - test_ratio
                x_train, x_valid, y_train, y_valid = train_test_split(
                    x, y, random_state=seed, train_size=train_size)
                model = _create_model(input_shape=input_shape,
                                      classes=len(domain))
                model.compile(optimizer=keras.optimizers.SGD(decay=1e-5,
                                                             nesterov=True,
                                                             momentum=.9),
                              loss=keras.losses.categorical_crossentropy,
                              metrics=["accuracy"])
                sorted_domain = sorted(domain.keys(), key=lambda x: domain[x])
                model.fit(x=x_train,
                          y=y_train,
                          validation_data=(x_valid, y_valid),
                          epochs=epochs,
                          batch_size=batch_size,
                          callbacks=[
                              MLflowLogger(model=model,
                                           x_train=x_train,
                                           y_train=y_train,
                                           x_valid=x_valid,
                                           y_valid=y_valid,
                                           artifact_path="model",
                                           domain=sorted_domain,
                                           image_dims=input_shape)
                          ])