Exemplo n.º 1
0
def test_wide_deep(notebooks, tmp):
    notebook_path = notebooks["wide_deep"]

    params = {
        "MOVIELENS_DATA_SIZE": "100k",
        "EPOCHS": 1,
        "EVALUATE_WHILE_TRAINING": False,
        "MODEL_DIR": tmp,
        "EXPORT_DIR_BASE": tmp,
        "RATING_METRICS": ["rmse", "mae"],
        "RANKING_METRICS": ["ndcg_at_k", "precision_at_k"],
    }
    pm.execute_notebook(
        notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME, parameters=params
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    # Model performance is highly dependant on the initial random weights
    # when epochs is small with a small dataset.
    # Therefore, in the smoke-test context, rather check if the model training is working
    # with minimum performance metrics as follows:
    assert results["rmse"] < 2.0
    assert results["mae"] < 2.0
    assert results["ndcg_at_k"] > 0.0
    assert results["precision_at_k"] > 0.0
Exemplo n.º 2
0
def test_fastai(notebooks):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1),
    )
def test_lightgbm(notebooks):
    notebook_path = notebooks["lightgbm_quickstart"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME,
                        parameters=dict(MAX_LEAF=32,
                                        MIN_DATA=20,
                                        NUM_OF_TREES=10,
                                        TREE_LEARNING_RATE=0.15,
                                        EARLY_STOPPING_ROUNDS=20,
                                        METRIC="auc"), )
def task_wrapper(*args, **kwargs):
    ''' In case that some tasks are executed repeatedly'''
    start = datetime.now()
    fin_flag_path = os.path.join(os.path.dirname(kwargs['output']), 'fin_flag')
    if os.path.exists(fin_flag_path):
        print("Task has been finished before.")
    else:
        pm.execute_notebook(*args, **kwargs)
        open(fin_flag_path, 'a').close()  # touch to indicate the task has been finished
    return datetime.now() - start
Exemplo n.º 5
0
def test_ncf_deep_dive(notebooks):
    notebook_path = notebooks["ncf_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=2048
        ),
    )
def test_mmlspark_lightgbm_criteo_runs(notebooks):
    notebook_path = notebooks["mmlspark_lightgbm_criteo"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            DATA_SIZE="sample",
            NUM_ITERATIONS=10,
            EARLY_STOPPING_ROUND=2,
        )
    )
Exemplo n.º 7
0
def test_notebook_dkn(notebooks):
    notebook_path = notebooks["dkn_quickstart"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(epoch=1),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["res"]["auc"] == pytest.approx(0.4707, rel=TOL, abs=ABS_TOL)
    assert results["res"]["acc"] == pytest.approx(0.5725, rel=TOL, abs=ABS_TOL)
def test_template_runs(notebooks):
    notebook_path = notebooks["template"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters=dict(PM_VERSION=pm.__version__),
        kernel_name=KERNEL_NAME,
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    df = nb.dataframe
    assert df.shape[0] == 2
    check_version = df.loc[df["name"] == "checked_version", "value"].values[0]
    assert check_version is True
def test_spark_tuning(notebooks):
    notebook_path = notebooks["spark_tuning"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            NUMBER_CORES="*",
            NUMBER_ITERATIONS=3,
            RANK=[5, 5],
            REG=[0.1, 0.01]
        )
    )
Exemplo n.º 10
0
def test_fastai_integration(notebooks, size, epochs, expected_values):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
Exemplo n.º 11
0
def test_ncf_smoke(notebooks):
    notebook_path = notebooks["ncf"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=256),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    # There is too much variability to do an approx equal, just adding top values
    assert results["map"] < 0.05
    assert results["ndcg"] < 0.20
    assert results["precision"] < 0.17
    assert results["recall"] < 0.10
def test_mmlspark_lightgbm_criteo_smoke(notebooks):
    notebook_path = notebooks["mmlspark_lightgbm_criteo"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            DATA_SIZE="sample",
            NUM_ITERATIONS=50,
            EARLY_STOPPING_ROUND=10
        )
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL)
Exemplo n.º 13
0
def test_fastai(notebooks):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["rmse"] == pytest.approx(0.959352, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.766504, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.287902, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.289008, rel=TOL, abs=ABS_TOL)
    assert results["map"] == pytest.approx(0.024379, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.148380, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.138494, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.058747, rel=TOL, abs=ABS_TOL)
Exemplo n.º 14
0
def test_is_jupyter():
    # Test on the terminal
    assert is_jupyter() is False
    assert is_databricks() is False

    # Test on Jupyter notebook
    path = os.path.join("tests", "unit", "test_notebook_utils.ipynb")
    pm.execute_notebook(
        path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    df = nb.dataframe
    result_is_jupyter = df.loc[df["name"] == "is_jupyter", "value"].values[0]
    assert result_is_jupyter is True
    result_is_databricks = df.loc[df["name"] == "is_databricks", "value"].values[0]
    assert result_is_databricks is False
Exemplo n.º 15
0
def test_wide_deep(notebooks, size, epochs, expected_values, tmp):
    notebook_path = notebooks["wide_deep"]

    params = {
        "MOVIELENS_DATA_SIZE": size,
        "EPOCHS": epochs,
        "EVALUATE_WHILE_TRAINING": False,
        "MODEL_DIR": tmp,
        "EXPORT_DIR_BASE": tmp,
        "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
    }
    pm.execute_notebook(
        notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME, parameters=params
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
Exemplo n.º 16
0
def test_notebook_xdeepfm(notebooks):
    notebook_path = notebooks["xdeepfm_quickstart"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            EPOCHS_FOR_SYNTHETIC_RUN=20,
            EPOCHS_FOR_CRITEO_RUN=1,
            BATCH_SIZE_SYNTHETIC=128,
            BATCH_SIZE_CRITEO=512,
        ),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["res_syn"]["auc"] == pytest.approx(0.982, rel=TOL, abs=ABS_TOL)
    assert results["res_syn"]["logloss"] == pytest.approx(0.2306, rel=TOL, abs=ABS_TOL)
    assert results["res_real"]["auc"] == pytest.approx(0.628, rel=TOL, abs=ABS_TOL)
    assert results["res_real"]["logloss"] == pytest.approx(0.5589, rel=TOL, abs=ABS_TOL)
def test_als_pyspark_integration(notebooks):
    notebook_path = notebooks["als_pyspark"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="1m"),
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    start_or_get_spark("ALS PySpark").stop()

    assert results["map"] == pytest.approx(0.00201, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.02516, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.03172, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.009302, rel=TOL, abs=ABS_TOL)
    assert results["rmse"] == pytest.approx(0.8621, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.68023, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.4094, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.4038, rel=TOL, abs=ABS_TOL)
def test_als_pyspark_smoke(notebooks):
    notebook_path = notebooks["als_pyspark"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"),
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    start_or_get_spark("ALS PySpark").stop()

    assert results["map"] == pytest.approx(0.0052, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.0463, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.0487, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.0177, rel=TOL, abs=ABS_TOL)
    assert results["rmse"] == pytest.approx(0.9636, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.7508, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.2672, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.2611, rel=TOL, abs=ABS_TOL)
def test_ic_22_notebook_run(
    classification_notebooks,
    subscription_id,
    resource_group,
    workspace_name,
    workspace_region,
):
    notebook_path = classification_notebooks[
        "22_deployment_on_azure_kubernetes_service"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters=dict(
            PM_VERSION=pm.__version__,
            subscription_id=subscription_id,
            resource_group=resource_group,
            workspace_name=workspace_name,
            workspace_region=workspace_region,
        ),
        kernel_name=KERNEL_NAME,
    )
Exemplo n.º 20
0
    def test_boston(self):
        notebook_path = "tasks/random-forest-regressor/Experiment.ipynb"

        papermill.execute_notebook(
            notebook_path,
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",

                filter_type="remover",
                model_features="",

                one_hot_features="",

                n_estimators=10,
                criterion="mse",
                max_depth=None,
                max_features="auto",
            ),
        )
Exemplo n.º 21
0
def test_notebooks_advanced(isolated_filesystem):
    notebooks = glob.glob("advanced/*.ipynb")
    notebooks += glob.glob("advanced/Split Neural Network/*.ipynb")
    for notebook in notebooks:
        list_name = Path("examples/tutorials/") / notebook
        if list_name in not_excluded_notebooks:
            not_excluded_notebooks.remove(list_name)
            res = pm.execute_notebook(notebook,
                                      "/dev/null",
                                      parameters={"epochs": 1},
                                      timeout=300)
            assert isinstance(res, nbformat.notebooknode.NotebookNode)
Exemplo n.º 22
0
def test_fastai_smoke(notebooks, output_notebook, kernel_name):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(
        notebook_path,
        output_notebook,
        kernel_name=kernel_name,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1),
    )
    results = sb.read_notebook(output_notebook).scraps.dataframe.set_index(
        "name")["data"]

    assert results["rmse"] == pytest.approx(0.959352, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.766504, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.287902, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.289008, rel=TOL, abs=ABS_TOL)
    assert results["map"] == pytest.approx(0.024379, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.148380, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.138494,
                                                 rel=TOL,
                                                 abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.058747, rel=TOL, abs=ABS_TOL)
Exemplo n.º 23
0
def test_notebooks_basic(isolated_filesystem, notebook):
    """Test Notebooks in the tutorial root folder."""
    notebook = notebook.split("/")[-1]
    list_name = Path("examples/tutorials/") / notebook
    tested_notebooks.append(str(list_name))
    res = pm.execute_notebook(
        notebook,
        "/dev/null",
        parameters={"epochs": 1, "n_test_batches": 5, "n_train_items": 64, "n_test_items": 64},
        timeout=300,
    )
    assert isinstance(res, nbformat.notebooknode.NotebookNode)
Exemplo n.º 24
0
    def test_experiment_titanic(self):
        notebook_path = "tasks/mlp-classifier/Experiment.ipynb"

        papermill.execute_notebook(
            notebook_path,
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                hidden_layer_sizes=100,
                activation="relu",
                solver="adam",
                learning_rate="constant",
                max_iter=200,
                shuffle=True,
                method="predict_proba",
            ),
        )
Exemplo n.º 25
0
def test_dkn_quickstart_integration(notebooks):
    notebook_path = notebooks["dkn_quickstart"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(epochs=5, batch_size=500),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]

    assert results["res"]["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL)
    assert results["res"]["mean_mrr"] == pytest.approx(0.1639,
                                                       rel=TOL,
                                                       abs=ABS_TOL)
    assert results["res"]["ndcg@5"] == pytest.approx(0.1735,
                                                     rel=TOL,
                                                     abs=ABS_TOL)
    assert results["res"]["ndcg@10"] == pytest.approx(0.2301,
                                                      rel=TOL,
                                                      abs=ABS_TOL)
Exemplo n.º 26
0
def test_execute_script(tmpdir):
    tmp_py = str(tmpdir.join('script.py'))
    tmp_ipynb = str(tmpdir.join('executed.ipynb'))

    with open(tmp_py, 'w') as fp:
        fp.write("""# %% {"tags": ["parameters"]}
# This cell defines the default value for parameters
integer = 1
text = "default"

# %%
print('Parameters are {}'.format({'integer': integer, 'text': text}))
""")

    pm.execute_notebook('txt://' + tmp_py,
                        tmp_ipynb,
                        parameters=dict(integer=2))

    assert os.path.isfile(tmp_ipynb)
    nb = nbformat.read(tmp_ipynb, as_version=4)
    assert len(nb.cells) == 3
Exemplo n.º 27
0
    def test_experiment_titanic(self):
        notebook_path = "tasks/svc/Experiment.ipynb"

        papermill.execute_notebook(
            notebook_path,
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/titanic.csv",
                target="Survived",
                filter_type="remover",
                model_features="",
                one_hot_features="",
                C=1.0,
                kernel="rbf",
                degree=3,
                gamma="auto",
                probability=True,
                max_iter=-1,
                method="predict_proba",
            ),
        )
Exemplo n.º 28
0
    def execute(self, **kwargs) -> Any:
        """
        TODO: Figure out how to share FlyteContext ExecutionParameters with the notebook kernel (as notebook kernel
             is executed in a separate python process)
        For Spark, the notebooks today need to use the new_session or just getOrCreate session and get a handle to the
        singleton
        """
        logger.info(
            f"Hijacking the call for task-type {self.task_type}, to call notebook."
        )
        # Execute Notebook via Papermill.
        pm.execute_notebook(self._notebook_path,
                            self.output_notebook_path,
                            parameters=kwargs)  # type: ignore

        outputs = self.extract_outputs(self.output_notebook_path)
        self.render_nb_html(self.output_notebook_path,
                            self.rendered_output_path)

        m = {}
        if outputs:
            m = outputs.literals
        output_list = []
        for k, type_v in self.python_interface.outputs.items():
            if k == self._IMPLICIT_OP_NOTEBOOK:
                output_list.append(self.output_notebook_path)
            elif k == self._IMPLICIT_RENDERED_NOTEBOOK:
                output_list.append(self.rendered_output_path)
            elif k in m:
                v = TypeEngine.to_python_value(
                    ctx=FlyteContext.current_context(),
                    lv=m[k],
                    expected_python_type=type_v)
                output_list.append(v)
            else:
                raise RuntimeError(
                    f"Expected output {k} of type {v} not found in the notebook outputs"
                )

        return tuple(output_list)
Exemplo n.º 29
0
def test_wide_deep(notebooks, tmp):
    notebook_path = notebooks["wide_deep"]

    model_dir = os.path.join(tmp, "wide_deep_0")
    os.mkdir(model_dir)
    params = {
        'MOVIELENS_DATA_SIZE': '100k',
        'EPOCHS': 0,
        'EVALUATE_WHILE_TRAINING': False,
        'MODEL_DIR': model_dir,
        'EXPORT_DIR_BASE': model_dir,
        'RATING_METRICS': ['rmse'],
        'RANKING_METRICS': ['ndcg_at_k'],
    }
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=params,
    )

    # Test w/o item features
    model_dir = os.path.join(tmp, "wide_deep_1")
    os.mkdir(model_dir)
    params = {
        'MOVIELENS_DATA_SIZE': '100k',
        'EPOCHS': 0,
        'ITEM_FEAT_COL': None,
        'EVALUATE_WHILE_TRAINING': True,
        'MODEL_DIR': model_dir,
        'EXPORT_DIR_BASE': model_dir,
        'RATING_METRICS': ['rsquared'],
        'RANKING_METRICS': ['map_at_k'],
    }
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=params,
    )
Exemplo n.º 30
0
def drive_hovm_difference(settings, analysis_name):
    driver_settings = settings[analysis_name].copy()
    current_params = create_current_params(settings)
    if analysis_name != 'hovm_difference_clim':
        check_num_paths(settings, min_number=2)
        current_params = fill_input(current_params, settings, fill_type = 'reference')
    else:
        check_num_paths(settings, min_number=1)
        current_params = fill_input(current_params, settings, fill_type = 'climatology')
    webpage = {}
    image_count = 0
    for region_name, region in driver_settings.items():
        for variable_name, variable in region.items():

            current_params["region"] = region_name
            current_params["variable"] = variable_name
            current_params.update(variable)
            region_name_underscore = region_name.replace(' ', '_')

            ofile = f"{settings['workflow_name']}_{analysis_name}_{region_name_underscore}_{variable_name}.png"
            ofile_nb = f"{settings['workflow_name']}_{analysis_name}_{region_name_underscore}_{variable_name}.ipynb"
            current_params["ofile"] = os.path.join(settings['ofolder_figures'], ofile)

            pm.execute_notebook(
                f"{templates_nb_path}/hovm_difference.ipynb",
                os.path.join(settings['ofolder_notebooks'], ofile_nb),
                parameters=current_params,
                nest_asyncio=True,
            )
            webpage[f"image_{image_count}"] = {}
            webpage[f"image_{image_count}"][
                "name"
            ] = f"{variable_name.capitalize()} for {region_name}"
            webpage[f"image_{image_count}"]["path"] = os.path.join('./figures/', ofile)
            webpage[f"image_{image_count}"]["path_nb"] = os.path.join('./notebooks/', ofile_nb)
            webpage[f"image_{image_count}"][
                "short_name"
            ] = f"{settings['workflow_name']}_{analysis_name}_{region_name_underscore}_{variable_name}"
            image_count += 1
    return webpage
Exemplo n.º 31
0
def test_wide_deep(notebooks, tmp):
    notebook_path = notebooks["wide_deep"]

    model_dir = os.path.join(tmp, "wide_deep_0")
    os.mkdir(model_dir)
    params = {
        'MOVIELENS_DATA_SIZE': '100k',
        'EPOCHS': 0,
        'EVALUATE_WHILE_TRAINING': False,
        'MODEL_DIR': model_dir,
        'EXPORT_DIR_BASE': model_dir,
        'RATING_METRICS': ['rmse'],
        'RANKING_METRICS': ['ndcg_at_k'],
    }
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=params,
    )

    # Test w/o item features
    model_dir = os.path.join(tmp, "wide_deep_1")
    os.mkdir(model_dir)
    params = {
        'MOVIELENS_DATA_SIZE': '100k',
        'EPOCHS': 0,
        'ITEM_FEAT_COL': None,
        'EVALUATE_WHILE_TRAINING': True,
        'MODEL_DIR': model_dir,
        'EXPORT_DIR_BASE': model_dir,
        'RATING_METRICS': ['rsquared'],
        'RANKING_METRICS': ['map_at_k'],
    }
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=params,
    )
Exemplo n.º 32
0
    def test_experiment_hymenoptera(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/hymenoptera.zip",
                arch_list=["resnet18", "resnet50", "vgg16"],
                aug_polices=["fa_reduced_cifar10",
                             "fa_resnet50_rimagenet", "fa_reduced_svhn"],
                dataset_id="hymenoptera",
                checkpoint_path="/tmp/data/models-output/",
                output_graphs="/tmp/data/eval-images/",
                top_predictions=1,

                batch=12,
                epochs=1,
                lr=0.001,
                gamma=0.1,
                step_size=7,
                momentum=0.1,
            ),
        )
Exemplo n.º 33
0
def test_deep_and_unified_understanding(notebooks):
    notebook_path = notebooks["deep_and_unified_understanding"]
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME)

    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
    sigma_numbers = [
        0.00317593, 0.00172284, 0.00634005, 0.00164305, 0.00317159
    ]
    sigma_bert = [
        0.1735696, 0.14028822, 0.14590865, 0.2263149, 0.20640415, 0.21249843,
        0.18685372, 0.14112663, 0.25824168, 0.22399105, 0.2393731, 0.12868434,
        0.27386534, 0.35876372
    ]

    np.testing.assert_array_almost_equal(result["sigma_numbers"],
                                         sigma_numbers,
                                         decimal=3)
    np.testing.assert_array_almost_equal(result["sigma_bert"],
                                         sigma_bert,
                                         decimal=1)
Exemplo n.º 34
0
    def exec(self, context: Dict, rundir: str):
        """Execute the notebook using papermill in the given workflow context.

        Parameters
        ----------
        context: dict
            Mapping of parameter names to their current value in the workflow
            executon state. These are the global variables in the execution
            context.
        rundir: string
            Directory for the workflow run that contains all the run files.
        """
        import papermill as pm
        # Prepare parameters for running the notebook using papermill.
        kwargs = self._get_parameters(context=context)
        # Change working directory temporarily to the given rundir.
        cwd = os.getcwd()
        os.chdir(rundir)
        try:
            pm.execute_notebook(self.notebook, self.output, parameters=kwargs)
        finally:
            os.chdir(cwd)
Exemplo n.º 35
0
    def _compile_sample(self):

        os.chdir(self._work_dir)
        print('Run the sample tests...')

        # For presubmit check, do not do any image injection as for now.
        # Notebook samples need to be papermilled first.
        if self._test_name == 'lightweight_component':
            pm.execute_notebook(
                input_path='Lightweight Python components - basics.ipynb',
                output_path='%s.ipynb' % self._test_name,
                parameters=dict(EXPERIMENT_NAME='%s-test' % self._test_name))
        elif self._test_name == 'dsl_static_type_checking':
            pm.execute_notebook(input_path='DSL Static Type Checking.ipynb',
                                output_path='%s.ipynb' % self._test_name,
                                parameters={})
        else:
            subprocess.call([
                'dsl-compile', '--py',
                '%s.py' % self._test_name, '--output',
                '%s.yaml' % self._test_name
            ])
Exemplo n.º 36
0
    def test_experiment_hotel_bookings(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(dataset="/tmp/data/hotel_bookings.csv",
                            features_to_filter=[
                                "reservation_status_date", "arrival_date_year"
                            ]),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.hotel_bookings_testdata_full()
        with server.Server() as s:
            response = s.test(data=data)

        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 30)  # 32 features - 2 removed
        self.assertEqual(len(names), 30)
def test_tc_dac_bert_ar(notebooks, tmp):
    notebook_path = notebooks["tc_dac_bert_ar"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            NUM_GPUS=1,
            DATA_FOLDER=tmp,
            BERT_CACHE_DIR=tmp,
            BATCH_SIZE=32,
            NUM_EPOCHS=1,
            TRAIN_SIZE=0.8,
            NUM_ROWS=15000,
            RANDOM_STATE=0,
        ),
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
    assert pytest.approx(result["accuracy"], 0.93, abs=ABS_TOL)
    assert pytest.approx(result["precision"], 0.91, abs=ABS_TOL)
    assert pytest.approx(result["recall"], 0.91, abs=ABS_TOL)
    assert pytest.approx(result["f1"], 0.91, abs=ABS_TOL)
Exemplo n.º 38
0
def test_wide_deep_integration(notebooks, size, steps, expected_values, seed, tmp):
    notebook_path = notebooks["wide_deep"]

    params = {
        "MOVIELENS_DATA_SIZE": size,
        "STEPS": steps,
        "EVALUATE_WHILE_TRAINING": False,
        "MODEL_DIR": tmp,
        "EXPORT_DIR_BASE": tmp,
        "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
        "RANDOM_SEED": seed,
    }
    pm.execute_notebook(
        notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME, parameters=params
    )
    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.dataframe.set_index("name")[
        "data"
    ]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
Exemplo n.º 39
0
def test_entailment_multinli_bert(notebooks, tmp):
    notebook_path = notebooks["entailment_multinli_transformers"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters={
            "MODEL_NAME": "bert-base-uncased",
            "TO_LOWER": True,
            "TRAIN_DATA_USED_FRACTION": 0.05,
            "DEV_DATA_USED_FRACTION": 0.05,
            "NUM_EPOCHS": 1,
            "CACHE_DIR": tmp
        },
        kernel_name=KERNEL_NAME,
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
    assert pytest.approx(result["matched_precision"], 0.76, abs=ABS_TOL)
    assert pytest.approx(result["matched_recall"], 0.76, abs=ABS_TOL)
    assert pytest.approx(result["matched_f1"], 0.76, abs=ABS_TOL)
    assert pytest.approx(result["mismatched_precision"], 0.76, abs=ABS_TOL)
    assert pytest.approx(result["mismatched_recall"], 0.76, abs=ABS_TOL)
    assert pytest.approx(result["mismatched_f1"], 0.76, abs=ABS_TOL)
def test_surprise_svd_smoke(notebooks):
    notebook_path = notebooks["surprise_svd_deep_dive"]
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME)
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(MOVIELENS_DATA_SIZE="100k"),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]

    assert results["rmse"] == pytest.approx(0.96, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.29, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.29, rel=TOL, abs=ABS_TOL)
    assert results["map"] == pytest.approx(0.013, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.1, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.095, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.032, rel=TOL, abs=ABS_TOL)
def skip_test_21_notebook_run(classification_notebooks, tiny_ic_data_path):
    """ NOTE - this function is intentionally prefixed with 'skip' so that
    pytests bypasses this function
    """
    notebook_path = classification_notebooks[
        "21_deployment_on_azure_container_instances"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters=dict(PM_VERSION=pm.__version__,
                        DATA_PATH=tiny_ic_data_path),
        kernel_name=KERNEL_NAME,
    )
    try:
        os.remove("myenv.yml")
    except OSError:
        pass
    try:
        os.remove("score.py")
    except OSError:
        pass

    try:
        os.remove("output.ipynb")
    except OSError:
        pass

    # There should be only one file, but the name may be changed
    file_list = glob.glob("./*.pkl")
    for filePath in file_list:
        try:
            os.remove(filePath)
        except OSError:
            pass

    # TODO should use temp folder for safe cleanup. Notebook should accept the folder paths via papermill param.
    shutil.rmtree(os.path.join(os.getcwd(), "azureml-models"))
    shutil.rmtree(os.path.join(os.getcwd(), "models"))
    shutil.rmtree(os.path.join(os.getcwd(), "outputs"))
Exemplo n.º 42
0
def test_notebooks_mnist_02():
    notebook_mnist_02 = data_centric_mnist_path.joinpath(
        "02-FL-mnist-train-model.ipynb")

    res = pm.execute_notebook(
        str(notebook_mnist_02),
        os.devnull,
        dict(grid_address="http://localhost:" + GRID_NETWORK_PORT,
             N_EPOCHS=2,
             N_TEST=2),
    )

    assert isinstance(res, nbformat.notebooknode.NotebookNode)
Exemplo n.º 43
0
def test_sar_single_node_smoke(notebooks):
    notebook_path = notebooks["sar_single_node"]
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME)
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]

    assert results["map"] == pytest.approx(0.105815262, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.373197255, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.326617179,
                                                 rel=TOL,
                                                 abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.175956743,
                                              rel=TOL,
                                              abs=ABS_TOL)
Exemplo n.º 44
0
def test_bidaf_deep_dive(
    notebooks, subscription_id, resource_group, workspace_name, workspace_region
):
    notebook_path = notebooks["bidaf_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters={
            "NUM_EPOCHS": 1,
            "config_path": None,
            "PROJECT_FOLDER": "examples/question_answering/bidaf-question-answering",
            "SQUAD_FOLDER": "examples/question_answering/squad",
            "LOGS_FOLDER": "examples/question_answering/",
            "BIDAF_CONFIG_PATH": "examples/question_answering/",
            "subscription_id": subscription_id,
            "resource_group": resource_group,
            "workspace_name": workspace_name,
            "workspace_region": workspace_region,
        },
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict["validation_EM"]
    assert result == pytest.approx(0.5, abs=ABS_TOL)
Exemplo n.º 45
0
def test_notebooks_basic_translations(isolated_filesystem, translated_notebook):  # pragma: no cover
    """Test Notebooks in the tutorial translations folder."""
    notebook = "/".join(translated_notebook.split("/")[-2:])
    notebook = f"translations/{notebook}"
    list_name = Path(f"examples/tutorials/{notebook}")
    tested_notebooks.append(str(list_name))
    res = pm.execute_notebook(
        notebook,
        "/dev/null",
        parameters={"epochs": 1, "n_test_batches": 5, "n_train_items": 64, "n_test_items": 64},
        timeout=400,
    )
    assert isinstance(res, nbformat.notebooknode.NotebookNode)
Exemplo n.º 46
0
def test_ncf_deep_dive_integration(
    notebooks, size, epochs, batch_size, expected_values, seed
):
    notebook_path = notebooks["ncf_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            TOP_K=10,
            MOVIELENS_DATA_SIZE=size,
            EPOCHS=epochs,
            BATCH_SIZE=batch_size,
            SEED=seed,
        ),
    )
    results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.dataframe.set_index("name")[
        "data"
    ]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
Exemplo n.º 47
0
def test_fastai(notebooks):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME)
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]

    assert results["rmse"] == pytest.approx(0.959352, TOL)
    assert results["mae"] == pytest.approx(0.766504, TOL)
    assert results["rsquared"] == pytest.approx(0.287902, TOL)
    assert results["exp_var"] == pytest.approx(0.289008, TOL)
    assert results["map"] == pytest.approx(0.024379, TOL)
    assert results["ndcg"] == pytest.approx(0.148380, TOL)
    assert results["precision"] == pytest.approx(0.138494, TOL)
    assert results["recall"] == pytest.approx(0.058747, TOL)
Exemplo n.º 48
0
def test_minilm_abstractive_summarization(notebooks, tmp):
    notebook_path = notebooks["minilm_abstractive_summarization"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            QUICK_RUN=True,
            NUM_GPUS=0,
            TOP_N=2,
            WARMUP_STEPS=5,
            MAX_STEPS=50,
            GRADIENT_ACCUMULATION_STEPS=1,
            TEST_PER_GPU_BATCH_SIZE=2,
            BEAM_SIZE=3,
            CLEANUP_RESULTS=True,
        ),
    )
    result = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.data_dict
    assert pytest.approx(result["rouge_1_f_score"], 0.1, abs=ABS_TOL)
    assert pytest.approx(result["rouge_2_f_score"], 0.05, abs=ABS_TOL)
    assert pytest.approx(result["rouge_l_f_score"], 0.1, abs=ABS_TOL)
Exemplo n.º 49
0
    def test_experiment_boston(self):
        papermill.execute_notebook(
            "Experiment.ipynb",
            "/dev/null",
            parameters=dict(
                dataset="/tmp/data/boston.csv",
                target="medv",
                norm="l2",
            ),
        )

        papermill.execute_notebook(
            "Deployment.ipynb",
            "/dev/null",
        )
        data = datasets.boston_testdata()
        with server.Server() as s:
            response = s.test(data=data)
        names = response["names"]
        ndarray = response["ndarray"]
        self.assertEqual(len(ndarray[0]), 13)  # 13 features
        self.assertEqual(len(names), 13)
def test_vw_deep_dive_runs(notebooks):
    notebook_path = notebooks["vowpal_wabbit_deep_dive"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
def test_als_pyspark_runs(notebooks):
    notebook_path = notebooks["als_pyspark"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
def test_sar_single_node_runs(notebooks):
    notebook_path = notebooks["sar_single_node"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
def test_data_split_runs(notebooks):
    notebook_path = notebooks["data_split"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
def test_evaluation_runs(notebooks):
    notebook_path = notebooks["evaluation"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
Exemplo n.º 55
0
if params['TOP_K'] <= 0:
    raise ValueError("Top K should be larger than 0")

if params['MODEL_TYPE'] not in {'wide', 'deep', 'wide_deep'}:
    raise ValueError("Model type should be either 'wide', 'deep', or 'wide_deep'")

if params['DATA_DIR'] is None:
    raise ValueError("Datastore path should be given")

print("Args:")
for k, v in params.items():
    _log(k, v)


print("Run", NOTEBOOK_NAME)

pm.execute_notebook(
    NOTEBOOK_NAME,
    OUTPUT_NOTEBOOK,
    parameters=params,
    kernel_name='python3'
)
nb = pm.read_notebook(OUTPUT_NOTEBOOK)

for m, v in nb.data.items():
    _log(m, v)

# clean-up
os.remove(OUTPUT_NOTEBOOK)
shutil.rmtree(params['MODEL_DIR'], ignore_errors=True)
def test_surprise_deep_dive_runs(notebooks):
    notebook_path = notebooks["surprise_svd_deep_dive"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
def test_baseline_deep_dive_runs(notebooks):
    notebook_path = notebooks["baseline_deep_dive"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)