예제 #1
0
def model_main(activation='relu', width=1000, depth=2, lr=0.5, **kwargs):
  """Main method; this sequences the basic steps often

  - create the model and optimizer,
  - train and record metrics,
  - plot all metrics.

  """
  print(f'Building model with width = {width}, learning rate = {lr}')

  model = build_model(activation, width, depth)
  optimizer = tf.optimizers.SGD(lr)

  with uv.start_run() as r:

    MLFlowReporter().report_params({
        **kwargs,
        **{
            "depth": depth,
            "width": width,
            "lr": lr,
            "activation": activation
        }
    })

    train_and_log(model, optimizer, **kwargs)
예제 #2
0
def test_report_param(mock_pubsub, reporter):
  with tempfile.TemporaryDirectory() as tmpdir:
    mlf.set_tracking_uri(f'file:{tmpdir}/foo')
    _reset_experiment()

    mlflow_cfg = {
        'experiment_name': 'foo',
        'run_name': 'bar',
        'artifact_location': '/foo/bar',
    }

    with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter(
        reporter()) as r:
      assert r is not None

      param = {'a': 3.14159}
      r.report_param(k='a', v=param['a'])

      assert mlf.active_run() == active_run

      # we need to access the run via a client to inspect most params/tags/metrics
      client = mlf.tracking.MlflowClient()
      run = client.get_run(active_run.info.run_id)
      assert run is not None

      for k, v in param.items():
        p = run.data.params
        assert k in p
        assert p[k] == str(v)
예제 #3
0
def test_report_invalid(mock_pubsub, reporter, value):
    with tempfile.TemporaryDirectory() as tmpdir:
        mlf.set_tracking_uri(f'file:{tmpdir}/foo')
        _reset_experiment()

        mlflow_cfg = {
            'experiment_name': 'foo',
            'run_name': 'bar',
            'artifact_location': '/foo/bar',
        }

        with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter(
                reporter()) as r:
            assert r is not None

            steps = [{
                'step': 1,
                'm': {
                    'a': value,
                }
            }]

            for p in steps:
                for k, v in p['m'].items():
                    with pytest.raises(ValueError):
                        r.report(step=p['step'], k=k, v=v)
예제 #4
0
def test_report(mock_pubsub, reporter):
    with tempfile.TemporaryDirectory() as tmpdir:
        mlf.set_tracking_uri(f'file:{tmpdir}/foo')
        _reset_experiment()

        mlflow_cfg = {
            'experiment_name': 'foo',
            'run_name': 'bar',
            'artifact_location': '/foo/bar',
        }

        with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter(
                reporter()) as r:
            assert r is not None

            steps = [{
                'step': 1,
                'm': {
                    'a': 3,
                    'b': 3.141
                }
            }, {
                'step': 2,
                'm': {
                    'a': 6,
                    'b': 6.282
                }
            }]

            for p in steps:
                for k, v in p['m'].items():
                    r.report(step=p['step'], k=k, v=v)

            assert mlf.active_run() == active_run

            # we need to access the run via a client to inspect most params/tags/metrics
            client = mlf.tracking.MlflowClient()
            run = client.get_run(active_run.info.run_id)
            assert run is not None

            metrics = run.data.metrics

            metric_data = {}
            # check that the metrics are in the run data
            for k, v in steps[0]['m'].items():
                assert k in metrics
                metric_data[k] = {
                    x.step: x.value
                    for x in client.get_metric_history(active_run.info.run_id,
                                                       k)
                }

            for s in steps:
                cur_step = s['step']
                for k, v in s['m'].items():
                    assert metric_data[k][cur_step] == v
예제 #5
0
def _run_experiments(experiment_name: str):
    '''performs runs for each of our parameter settings, creating the
  associated mlflow objects and logging paramters'''

    for i, p in enumerate(PARAMETERS):
        with uv.start_run(
                experiment_name=experiment_name,
                run_name=f'run_{i}',
        ):
            uv.report_params(p)
            _compute(**p)
예제 #6
0
def test_report_params(mock_pubsub, reporter):
    with tempfile.TemporaryDirectory() as tmpdir:
        mlf.set_tracking_uri(f'file:{tmpdir}/foo')
        _reset_experiment()

        mlflow_cfg = {
            'experiment_name': 'foo',
            'run_name': 'bar',
            'artifact_location': '/foo/bar',
        }

        with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter(
                reporter()) as r:
            assert r is not None

            params = {
                'a': 3,
                'b': 'string_param',
                INVALID_KEY: INVALID_PARAM_VALUE,
            }
            r.report_params(params)

            assert mlf.active_run() == active_run

            # we need to access the run via a client to inspect most params/tags/metrics
            client = mlf.tracking.MlflowClient()
            run = client.get_run(active_run.info.run_id)
            assert run is not None

            for k, v in params.items():
                if k == INVALID_KEY:
                    k = SANITIZED_KEY
                if v == INVALID_PARAM_VALUE:
                    v = SANITIZED_PARAM_VALUE
                p = run.data.params
                assert k in p
                assert p[k] == str(v)
예제 #7
0
def test_start_run(monkeypatch):

  with tempfile.TemporaryDirectory() as tmpdir:

    mlf.set_tracking_uri(f'file:{tmpdir}/foo')

    # no run should be active initially
    assert mlf.active_run() is None

    # test default args
    with uv.start_run() as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

    # test explicit experiment name, run name, artifact location
    cfg = {
        'experiment_name': 'foo',
        'run_name': 'bar',
        'artifact_location': 'gs://foo/bar',
    }

    with uv.start_run(**cfg) as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['run_name']
      assert mlf.get_experiment_by_name(cfg['experiment_name']) is not None
      assert mlf.get_artifact_uri().startswith(cfg['artifact_location'])

    # test env var experiment name, run name, artifact location
    cfg = {
        'MLFLOW_EXPERIMENT_NAME': 'env_foo',
        'MLFLOW_RUN_NAME': 'env_bar',
        'MLFLOW_ARTIFACT_ROOT': 'gs://env/foo/bar'
    }

    for k, v in cfg.items():
      monkeypatch.setenv(k, v)

    with uv.start_run() as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['MLFLOW_RUN_NAME']
      assert mlf.get_experiment_by_name(
          cfg['MLFLOW_EXPERIMENT_NAME']) is not None
      assert mlf.get_artifact_uri().startswith(cfg['MLFLOW_ARTIFACT_ROOT'])

    for k, v in cfg.items():
      monkeypatch.delenv(k)

    # test env var tags
    cfg = {
        'tag0': 'foo',
        'tag1': 'bar',
    }

    for k, v in cfg.items():
      monkeypatch.setenv(f'ENVVAR_{k}', v)

    with uv.start_run() as r:
      client = mlf.tracking.MlflowClient()
      tags = client.get_run(r.info.run_id).data.tags
      for k, v in cfg.items():
        assert k in tags, pp.pformat(tags)
        assert tags[k] == v, pp.pformat(tags)

    for k in cfg:
      monkeypatch.delenv(f'ENVVAR_{k}')

    # test CAIP tags
    monkeypatch.setenv('CLOUD_ML_JOB_ID', 'foo_cloud_job')

    with uv.start_run() as r:
      client = mlf.tracking.MlflowClient()
      tags = client.get_run(r.info.run_id).data.tags
      assert 'cloud_ml_job_details' in tags, pp.pformat(tags)
      assert tags['cloud_ml_job_details'] == (
          'https://console.cloud.google.com/ai-platform/jobs/foo_cloud_job')

      assert 'cloud_ml_job_id' in tags, pp.pformat(tags)
      assert tags['cloud_ml_job_id'] == 'foo_cloud_job'

    monkeypatch.delenv('CLOUD_ML_JOB_ID')
예제 #8
0
def main(**kwargs):

    with uv.start_run(), uv.active_reporter(MLFlowReporter()):
        uv.report_params(kwargs)
        _run_training(**kwargs)
예제 #9
0
def test_start_run(monkeypatch):

  _reset_experiment()

  with tempfile.TemporaryDirectory() as tmpdir:

    mlf.set_tracking_uri(f'file:{tmpdir}/foo')

    # no run should be active initially
    assert mlf.active_run() is None

    # test default args
    with uv.start_run() as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

    # test explicit experiment name, run name, artifact location
    cfg = {
        'experiment_name': 'experiment_0',
        'run_name': 'bar',
        'artifact_location': '/foo/bar',
    }

    with uv.start_run(**cfg) as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['run_name']
      assert mlf.get_experiment_by_name(cfg['experiment_name']) is not None
      assert mlf.get_artifact_uri().startswith(cfg['artifact_location'])

    # test env var experiment name, run name, path-based artifact location
    cfg = {
        'MLFLOW_EXPERIMENT_NAME': 'env_foo',
        'MLFLOW_RUN_NAME': 'env_bar',
        'MLFLOW_ARTIFACT_ROOT': '/tmp/foo/bar'
    }

    for k, v in cfg.items():
      monkeypatch.setenv(k, v)

    with uv.start_run() as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['MLFLOW_RUN_NAME']
      assert mlf.get_experiment_by_name(
          cfg['MLFLOW_EXPERIMENT_NAME']) is not None
      assert mlf.get_artifact_uri().startswith(cfg['MLFLOW_ARTIFACT_ROOT'])

    for k, v in cfg.items():
      monkeypatch.delenv(k)

    # test env var tags
    cfg = {
        'tag0': 'foo',
        'tag1': 'bar',
    }

    for k, v in cfg.items():
      monkeypatch.setenv(f'ENVVAR_{k}', v)

    with uv.start_run() as r:
      client = mlf.tracking.MlflowClient()
      tags = client.get_run(r.info.run_id).data.tags
      for k, v in cfg.items():
        assert k in tags, pp.pformat(tags)
        assert tags[k] == v, pp.pformat(tags)

    for k in cfg:
      monkeypatch.delenv(f'ENVVAR_{k}')

    # test CAIP tags
    monkeypatch.setenv('CLOUD_ML_JOB_ID', 'foo_cloud_job')

    with uv.start_run() as r:
      client = mlf.tracking.MlflowClient()
      tags = client.get_run(r.info.run_id).data.tags
      assert 'cloud_ml_job_details' in tags, pp.pformat(tags)
      assert tags['cloud_ml_job_details'] == (
          'https://console.cloud.google.com/ai-platform/jobs/foo_cloud_job')

      assert 'cloud_ml_job_id' in tags, pp.pformat(tags)
      assert tags['cloud_ml_job_id'] == 'foo_cloud_job'

    monkeypatch.delenv('CLOUD_ML_JOB_ID')

    # test case where no gcp project is set with gcs artifact store
    def mock_default(scopes=None, request=None, quota_project_id=None):
      return (google.auth.credentials.AnonymousCredentials(), None)

    monkeypatch.setattr('google.auth.default', mock_default)

    cfg = {
        'experiment_name': 'experiment_1',
        'run_name': 'bar',
        'artifact_location': 'gs://foo/bar',
    }

    with uv.start_run(**cfg) as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['run_name']
      assert mlf.get_experiment_by_name(cfg['experiment_name']) is not None
      assert mlf.get_artifact_uri().startswith(
          cfg['artifact_location']), mlf.get_artifact_uri()
      assert os.environ.get('GOOGLE_CLOUD_PROJECT') is not None

    # test case where gcp project is set with gcs artifact storage
    def mock_default(scopes=None, request=None, quota_project_id=None):
      return (google.auth.credentials.AnonymousCredentials(), 'test_project')

    monkeypatch.setattr('google.auth.default', mock_default)

    cfg = {
        'experiment_name': 'experiment_2',
        'run_name': 'bar',
        'artifact_location': 'gs://foo/bar',
    }

    with uv.start_run(**cfg) as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['run_name']
      assert mlf.get_experiment_by_name(cfg['experiment_name']) is not None
      assert mlf.get_artifact_uri().startswith(
          cfg['artifact_location']), mlf.get_artifact_uri()

    # test using existing experiment with different artifact location
    #   - this should use original artifact location
    cfg = {
        'experiment_name': 'experiment_2',
        'run_name': 'bar2',
        'artifact_location': '/a/b/c',
    }

    with uv.start_run(**cfg) as r:
      active_run = mlf.active_run()
      assert active_run is not None
      assert active_run == r

      assert r.data.tags['mlflow.runName'] == cfg['run_name']
      assert mlf.get_experiment_by_name(cfg['experiment_name']) is not None
      assert not mlf.get_artifact_uri().startswith(
          cfg['artifact_location']), mlf.get_artifact_uri()
예제 #10
0
def test_log_metrics(mock_pubsub):
  '''tests the log_metrics() funtion in mlflow_subscriber'''

  # test exception handling by passing invalid message and mlflow client
  log_metrics(None, None, True)

  with tempfile.TemporaryDirectory() as tmpdir:
    mlf.set_tracking_uri(f'file:{tmpdir}/foo')
    _reset_experiment()

    mlflow_cfg = {
        'experiment_name': 'foo',
        'run_name': 'bar',
        'artifact_location': '/foo/bar',
    }

    with uv.start_run(**mlflow_cfg), uv.active_reporter(
        MLFlowPubsubReporter('p', 't')) as r:
      active_run = mlf.active_run()
      steps = [{
          'step': 1,
          'm': {
              'a': 3,
              'b': 3.141
          }
      }, {
          'step': 2,
          'm': {
              'a': 6,
              'b': 6.282
          }
      }]

      client = mlf.tracking.MlflowClient()

      assert len(_MSG_QUEUE) == 0

      for s in steps:
        r.report_all(**s)

      assert len(_MSG_QUEUE) == len(steps)
      for m in _MSG_QUEUE:
        log_metrics(client, m, True)

      run = client.get_run(active_run.info.run_id)
      assert run is not None

      metrics = run.data.metrics

      metric_data = {}
      # check that the metrics are in the run data
      for k, v in steps[0]['m'].items():
        assert k in metrics
        metric_data[k] = {
            x.step: x.value
            for x in client.get_metric_history(active_run.info.run_id, k)
        }

      for s in steps:
        cur_step = s['step']
        for k, v in s['m'].items():
          assert metric_data[k][cur_step] == v
예제 #11
0
def main(_):
    """Builds and trains a sentiment classification RNN."""

    # prevent tf from accessing GPU
    tf.config.experimental.set_visible_devices([], "GPU")

    # Get and save config
    config = argparser.parse_args('main')
    logging.info(json.dumps(config, indent=2))

    with uv.start_run(
            experiment_name=config['save']['mlflow_expname'],
            run_name=config['save']['mlflow_runname']), uv.active_reporter(
                MLFlowReporter()):

        reporters.save_config(config)

        uv.report_params(reporters.flatten(config))

        prng_key = random.PRNGKey(config['run']['seed'])

        # Load data.
        vocab_size, train_dset, test_dset = data.get_dataset(config['data'])

        # Build network.
        cell = model_utils.get_cell(config['model']['cell_type'],
                                    num_units=config['model']['num_units'])

        init_fun, apply_fun, _, _ = network.build_rnn(
            vocab_size, config['model']['emb_size'], cell,
            config['model']['num_outputs'])

        loss_fun, acc_fun = optim_utils.loss_and_accuracy(
            apply_fun, config['model'], config['optim'])

        _, initial_params = init_fun(
            prng_key,
            (config['data']['batch_size'], config['data']['max_pad']))

        initial_params = model_utils.initialize(initial_params,
                                                config['model'])

        # get optimizer
        opt, get_params, opt_state, step_fun = optim_utils.optimization_suite(
            initial_params, loss_fun, config['optim'])

        ## Scope setup
        # Reporter setup
        data_store = {}
        reporter = reporters.build_reporters(config['save'], data_store)
        # Static state for scope
        static_state = {
            'acc_fun': acc_fun,
            'loss_fun': loss_fun,
            'param_extractor': get_params,
            'test_set': test_dset
        }

        oscilloscope = m.MetricCallback(static_state)

        def interval_trigger(interval):
            def function_to_return(x):
                return x % interval == 0

            return function_to_return

        oscilloscope.add_measurement({
            'name':
            'test_acc',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_test_acc
        })
        oscilloscope.add_measurement({
            'name':
            'shuffled_test_acc',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_shuffled_acc
        })
        oscilloscope.add_measurement({
            'name':
            'train_acc',
            'trigger':
            interval_trigger(config['save']['measure_train']),
            'function':
            measurements.measure_batch_acc
        })
        oscilloscope.add_measurement({
            'name':
            'train_loss',
            'trigger':
            interval_trigger(config['save']['measure_train']),
            'function':
            measurements.measure_batch_loss
        })
        oscilloscope.add_measurement({
            'name':
            'l2_norm',
            'trigger':
            interval_trigger(config['save']['measure_test']),
            'function':
            measurements.measure_l2_norm
        })
        # Train
        global_step = 0
        loss = np.nan
        for epoch in range(config['optim']['num_epochs']):

            for batch_num, batch in enumerate(tfds.as_numpy(train_dset)):
                dynamic_state = {
                    'opt_state': opt_state,
                    'batch_train_loss': loss,
                    'batch': batch
                }

                step_measurements = oscilloscope.measure(
                    int(global_step), dynamic_state)
                if step_measurements is not None:
                    reporter.report_all(int(global_step), step_measurements)

                global_step, opt_state, loss = step_fun(
                    global_step, opt_state, batch)

                if global_step % config['save']['checkpoint_interval'] == 0:
                    params = get_params(opt_state)
                    np_params = np.asarray(params, dtype=object)
                    reporters.save_dict(config, np_params,
                                        f'checkpoint_{global_step}')

        final_measurements = oscilloscope.measure(
            int(global_step),
            dynamic_state,
            measurement_list=['test_acc', 'shuffled_test_acc'])
        reporter.report_all(int(global_step), final_measurements)

        final_params = {
            'params': np.asarray(get_params(opt_state), dtype=object)
        }
        reporters.save_dict(config, final_params, 'final_params')