Exemplo n.º 1
0
def test_runner_folders(tmp_path, mocker):
    algo_path = create_dir('algo', root=tmp_path)
    train_opener_path = create_file('train/opener.py', root=tmp_path)
    test_opener_path = create_file('test/opener.py', root=tmp_path)
    metrics_path = create_dir('metrics', root=tmp_path)
    train_data_samples_path = create_dir('train_data_samples', root=tmp_path)
    test_data_samples_path = create_dir('test_data_samples_path',
                                        root=tmp_path)
    sandbox_path = create_dir('sandbox', root=tmp_path)

    mocker.patch('substra.runner.docker')
    mocker.patch('substra.runner._docker_run',
                 side_effect=docker_run_side_effect(sandbox_path))
    runner.compute(algo_path=algo_path,
                   train_opener_file=train_opener_path,
                   test_opener_file=test_opener_path,
                   metrics_path=metrics_path,
                   train_data_path=train_data_samples_path,
                   test_data_path=test_data_samples_path,
                   rank=0,
                   inmodels=[],
                   fake_data_samples=False,
                   compute_path=sandbox_path)

    paths = (os.path.join(tmp_path, 'sandbox/model/model'), )
    for p in paths:
        assert os.path.exists(p)
Exemplo n.º 2
0
def test_runner_archives(mocker, tmp_path):
    train_opener_path = create_file('train/opener.py', root=tmp_path)
    test_opener_path = create_file('test/opener.py', root=tmp_path)
    train_data_samples_path = create_dir('train_data_samples', root=tmp_path)
    test_data_samples_path = create_dir('test_data_samples_path',
                                        root=tmp_path)
    sandbox_path = create_dir('sandbox', root=tmp_path)

    algo_path = create_file('algo.zip', root=tmp_path)
    with zipfile.ZipFile(algo_path, 'w') as z:
        z.write(create_file('Dockerfile', root=tmp_path))
        z.write(create_file('algo.py', root=tmp_path))

    metrics_path = create_file('metrics.zip', root=tmp_path)
    with zipfile.ZipFile(metrics_path, 'w') as z:
        z.write(create_file('Dockerfile', root=tmp_path))
        z.write(create_file('metrics.py', root=tmp_path))

    mocker.patch('substra.runner.docker')
    mocker.patch('substra.runner._docker_run',
                 side_effect=docker_run_side_effect(sandbox_path))
    runner.compute(algo_path=algo_path,
                   train_opener_file=train_opener_path,
                   test_opener_file=test_opener_path,
                   metrics_path=metrics_path,
                   train_data_path=train_data_samples_path,
                   test_data_path=test_data_samples_path,
                   rank=0,
                   inmodels=[],
                   fake_data_samples=False,
                   compute_path=sandbox_path)

    paths = (os.path.join(tmp_path, 'sandbox/model/model'), )
    for p in paths:
        assert os.path.exists(p)
Exemplo n.º 3
0
def run_local(algo, train_opener, test_opener, metrics, rank,
              train_data_samples, test_data_samples, inmodels,
              fake_data_samples):
    """Run local.

    Train and test the algo located in ALGO (directory or archive) locally.

    This command can be used to check that objective, dataset and algo assets
    implementations are compatible.

    It will execute sequentially 3 tasks in docker:

    \b
    - train algo using train data samples
    - test model using test data samples
    - get model perf

    \b
    It will create several output files:
    - sandbox/model/model
    - sandbox/pred_test/perf.json
    - sandbox/pred_test/pred
    """
    if fake_data_samples and (train_data_samples or test_data_samples):
        raise click.BadOptionUsage(
            '--fake-data-samples',
            'Options --train-data-samples and --test-data-samples cannot '
            'be used if --fake-data-samples is activated')
    if not fake_data_samples and not train_data_samples and not test_data_samples:
        raise click.BadOptionUsage(
            '--fake-data-samples',
            'Missing option --fake-data-samples or --test-data-samples '
            'and --train-data-samples')
    if not fake_data_samples and train_data_samples and not test_data_samples:
        raise click.BadOptionUsage('--test-data-samples',
                                   'Missing option --test-data-samples')
    if not fake_data_samples and not train_data_samples and test_data_samples:
        raise click.BadOptionUsage('--train-data-samples',
                                   'Missing option --train-data-samples')

    try:
        runner.compute(algo_path=algo,
                       train_opener_file=train_opener,
                       test_opener_file=test_opener,
                       metrics_path=metrics,
                       train_data_path=train_data_samples,
                       test_data_path=test_data_samples,
                       fake_data_samples=fake_data_samples,
                       rank=rank,
                       inmodels=inmodels)
    except runner.PathTraversalException as e:
        raise click.ClickException(
            f'Archive "{e.archive_path}" includes at least 1 file or folder '
            f'located outside the archive root folder: "{e.issue_path}"')
Exemplo n.º 4
0
def test_runner_invalid_archives(tmp_path):
    algo_path = create_file('algo.zip', root=tmp_path)
    train_opener_path = create_file('train/opener.py', root=tmp_path)
    test_opener_path = create_file('test/opener.py', root=tmp_path)
    metrics_path = create_file('metrics.zip', root=tmp_path)
    train_data_samples_path = create_dir('train_data_samples', root=tmp_path)
    test_data_samples_path = create_dir('test_data_samples_path',
                                        root=tmp_path)
    sandbox_path = create_dir('sandbox', root=tmp_path)

    with pytest.raises(ValueError, match="Archive must be zip or tar.gz"):
        runner.compute(algo_path=algo_path,
                       train_opener_file=train_opener_path,
                       test_opener_file=test_opener_path,
                       metrics_path=metrics_path,
                       train_data_path=train_data_samples_path,
                       test_data_path=test_data_samples_path,
                       rank=0,
                       inmodels=[],
                       fake_data_samples=False,
                       compute_path=sandbox_path)