Пример #1
0
Файл: nb.py Проект: ydataai/kale
def validate_notebook(request,
                      source_notebook_path,
                      notebook_metadata_overrides=None):
    """Validate notebook metadata."""
    # Notebook metadata is validated at class instantiation
    NotebookProcessor(source_notebook_path, notebook_metadata_overrides)
    return True
Пример #2
0
def _prepare_transformer_assets(fn: Callable, assets: Dict = None):
    notebook_path = jputils.get_notebook_path()
    processor = NotebookProcessor(nb_path=notebook_path, skip_validation=True)
    fn_source = astutils.get_function_source(fn, strip_signature=False)
    missing_names = flakeutils.pyflakes_report(
        processor.get_imports_and_functions() + "\n" + fn_source)
    if not assets:
        assets = dict()
    if not isinstance(assets, dict):
        ValueError("Please provide preprocessing assets as a dictionary"
                   " mapping variables *names* to their objects")
    missing_assets = [x not in assets.keys() for x in missing_names]
    if any(missing_assets):
        raise RuntimeError(
            "The following abjects are a dependency for the"
            " provided preprocessing function. Please add the"
            " to the `preprocessing_assets` dictionary: %s" %
            [a for a, m in zip(missing_names, missing_assets) if m])
    # save function and assets
    utils.clean_dir(TRANSFORMER_ASSETS_DIR)
    marshal.set_data_dir(TRANSFORMER_ASSETS_DIR)
    marshal.save(fn, TRANSFORMER_FN_ASSET_NAME)
    for asset_name, asset_value in assets.items():
        marshal.save(asset_value, asset_name)
    # save notebook as well
    shutil.copy(
        notebook_path,
        os.path.join(TRANSFORMER_ASSETS_DIR, TRANSFORMER_SRC_NOTEBOOK_NAME))
Пример #3
0
def test_notebook_to_dsl(random_string, notebook_path, dsl_path):
    """Test code generation end to end from notebook to DSL."""
    random_string.return_value = "rnd"

    overrides = {"abs_working_dir": "/kale"}
    pipeline = NotebookProcessor(notebook_path, overrides).to_pipeline()
    dsl_script_path = Compiler(pipeline).compile()

    expected_result = open(dsl_path).read()
    result = open(dsl_script_path).read()
    assert result == expected_result
Пример #4
0
def compile_notebook(request, source_notebook_path,
                     notebook_metadata_overrides=None, debug=False):
    """Compile the notebook to KFP DSL."""
    processor = NotebookProcessor(source_notebook_path,
                                  notebook_metadata_overrides)
    pipeline = processor.to_pipeline()
    script_path = Compiler(pipeline).compile()
    # FIXME: Why were we tapping into the Kale logger?
    # instance = Kale(source_notebook_path, notebook_metadata_overrides, debug)
    # instance.logger = request.log if hasattr(request, "log") else logger

    package_path = kfputils.compile_pipeline(script_path,
                                             pipeline.config.pipeline_name)

    return {"pipeline_package_path": os.path.relpath(package_path),
            "pipeline_metadata": pipeline.config.to_dict()}
Пример #5
0
Файл: nb.py Проект: ydataai/kale
def get_pipeline_metrics(request, source_notebook_path):
    """Get the pipeline metrics tagged in the notebook."""
    # read notebook
    log = request.log if hasattr(request, "log") else logger
    try:
        processor = NotebookProcessor(os.path.expanduser(source_notebook_path),
                                      skip_validation=True)
        metrics_source = processor.get_pipeline_metrics_source()
        if metrics_source == '':
            raise ValueError("No pipeline metrics found. Please tag a cell"
                             " of the notebook with the `pipeline-metrics`"
                             " tag.")
        # get a dict from the 'pipeline parameters' cell source code
        metrics = astutils.parse_metrics_print_statements(metrics_source)
    except ValueError as e:
        log.exception("Failed to parse pipeline metrics")
        raise RPCInternalError(details=str(e), trans_id=request.trans_id)
    log.info("Pipeline metrics: {}".format(metrics))
    return metrics
Пример #6
0
Файл: nb.py Проект: ydataai/kale
def get_pipeline_parameters(request, source_notebook_path):
    """Get the pipeline parameters tagged in the notebook."""
    # read notebook
    log = request.log if hasattr(request, "log") else logger
    try:
        processor = NotebookProcessor(os.path.expanduser(source_notebook_path),
                                      skip_validation=True)
        params_source = processor.get_pipeline_parameters_source()
        if params_source == '':
            raise ValueError("No pipeline parameters found. Please tag a cell"
                             " of the notebook with the `pipeline-parameters`"
                             " tag.")
        # get a dict from the 'pipeline parameters' cell source code
        params_dict = astutils.parse_assignments_expressions(params_source)
    except ValueError as e:
        log.exception("Value Error during parsing of pipeline parameters")
        raise RPCInternalError(details=str(e), trans_id=request.trans_id)
    # convert dict in list so its easier to parse in js
    params = [[k, *v] for k, v in params_dict.items()]
    log.info("Pipeline parameters:")
    for ln in tabulate(params, headers=["name", "type", "value"]).split("\n"):
        log.info(ln)
    return params
Пример #7
0
    def _load_transformer_assets(self):
        marshal.set_data_dir(serveutils.TRANSFORMER_ASSETS_DIR)
        log.info("Loading transformer function...")
        _fn = marshal.load(serveutils.TRANSFORMER_FN_ASSET_NAME)
        # create a new function monkey patching the original function's
        # __globals__. The marshalled function would not be scoped under
        # the current module, thus its __globals__ dict would be empty.
        # In this way we create the same function but binding it to the
        # module's globals().
        self.fn = types.FunctionType(_fn.__code__, globals(), _fn.__name__,
                                     _fn.__defaults__, _fn.__closure__)

        log.info("Processing source notebook for imports and functions...")
        processor = NotebookProcessor(nb_path=os.path.join(
            serveutils.TRANSFORMER_ASSETS_DIR,
            serveutils.TRANSFORMER_SRC_NOTEBOOK_NAME),
                                      skip_validation=True)
        self.init_code = processor.get_imports_and_functions()
        log.info("Initialization code:\n%s" % self.init_code)
        log.info("Running initialization code...")
        exec(self.init_code, globals())

        log.info("Loading transformer's assets...")
        for file in os.listdir(serveutils.TRANSFORMER_ASSETS_DIR):
            if file in [
                    serveutils.TRANSFORMER_SRC_NOTEBOOK_NAME,
                    serveutils.TRANSFORMER_FN_ASSET_NAME
            ]:
                continue
            # The marshal mechanism works by looking at the name of the files
            # without extensions.
            basename = os.path.splitext(file)[0]  # remove extension
            self.assets[basename] = marshal.load(basename)
        log.info("Assets successfully loaded: %s" % self.assets.keys())
        log.info("Initializing assets...")
        for asset_name, asset_value in self.assets.items():
            globals()[asset_name] = asset_value
Пример #8
0
def main():
    """Entry-point of CLI command."""
    parser = argparse.ArgumentParser(description=ARGS_DESC,
                                     formatter_class=RawTextHelpFormatter)
    general_group = parser.add_argument_group('General')
    general_group.add_argument('--nb',
                               type=str,
                               help='Path to source JupyterNotebook',
                               required=True)
    # use store_const instead of store_true because we None instead of
    # False in case the flag is missing
    general_group.add_argument('--upload_pipeline',
                               action='store_const',
                               const=True)
    general_group.add_argument('--run_pipeline',
                               action='store_const',
                               const=True)
    general_group.add_argument('--debug', action='store_true')

    metadata_group = parser.add_argument_group('Notebook Metadata Overrides',
                                               METADATA_GROUP_DESC)
    metadata_group.add_argument('--experiment_name',
                                type=str,
                                help='Name of the created experiment')
    metadata_group.add_argument('--pipeline_name',
                                type=str,
                                help='Name of the deployed pipeline')
    metadata_group.add_argument('--pipeline_description',
                                type=str,
                                help='Description of the deployed pipeline')
    metadata_group.add_argument('--docker_image',
                                type=str,
                                help='Docker base image used to build the '
                                'pipeline steps')
    metadata_group.add_argument('--kfp_host',
                                type=str,
                                help='KFP endpoint. Provide address as '
                                '<host>:<port>.')
    metadata_group.add_argument('--storage-class-name',
                                type=str,
                                help='The storage class name for the created'
                                ' volumes')
    metadata_group.add_argument('--volume-access-mode',
                                type=str,
                                help='The access mode for the created volumes')

    args = parser.parse_args()

    # get the notebook metadata args group
    mt_overrides_group = next(
        filter(lambda x: x.title == 'Notebook Metadata Overrides',
               parser._action_groups))
    # get the single args of that group
    mt_overrides_group_dict = {
        a.dest: getattr(args, a.dest, None)
        for a in mt_overrides_group._group_actions
        if getattr(args, a.dest, None) is not None
    }

    # FIXME: We are removing the `debug` arg. This shouldn't be an issue
    processor = NotebookProcessor(args.nb, mt_overrides_group_dict)
    pipeline = processor.run()
    dsl_script_path = Compiler(pipeline).compile()
    pipeline_name = pipeline.config.pipeline_name
    pipeline_package_path = kfputils.compile_pipeline(dsl_script_path,
                                                      pipeline_name)

    if args.upload_pipeline:
        kfputils.upload_pipeline(pipeline_package_path=pipeline_package_path,
                                 pipeline_name=pipeline_name,
                                 host=pipeline.config.kfp_host)

    if args.run_pipeline:
        run_name = kfputils.generate_run_name(pipeline_name)
        kfputils.run_pipeline(run_name=run_name,
                              experiment_name=pipeline.config.experiment_name,
                              pipeline_package_path=pipeline_package_path,
                              host=pipeline.config.kfp_host)
Пример #9
0
def notebook_processor(dummy_nb_config):
    """Return a notebook processor over a dummy in-memory notebook."""
    with patch.object(NotebookProcessor, '_read_notebook',
                      lambda _: nbformat.v4.new_notebook()):
        return NotebookProcessor("path/to/nb", dummy_nb_config)