Python get_project_context示例，kedro.cli.get_project_context Python示例

示例#1

0

显示文件

def docker_build(uid, gid, image, docker_args):
    """Build a Docker image for the project."""

    uid, gid = get_uid_gid(uid, gid)
    project_path = get_project_context("project_path")
    image = image or str(project_path.name)

    template_path = Path(__file__).parent / "template"
    verbose = get_project_context("verbose")
    copy_template_files(
        project_path,
        template_path,
        ["Dockerfile", ".dockerignore", ".dive-ci"],
        verbose,
    )

    combined_args = compose_docker_run_args(
        required_args=[
            ("--build-arg", "KEDRO_UID={0}".format(uid)),
            ("--build-arg", "KEDRO_GID={0}".format(gid)),
        ],
        # add image tag if only it is not already supplied by the user
        optional_args=[("-t", image)],
        user_args=docker_args,
    )
    command = ["docker", "build"] + combined_args + [str(project_path)]
    call(command)

示例#2

0

显示文件

def _get_pipeline_catalog_from_kedro14(env):
    try:
        pipeline = get_project_context("create_pipeline")()
        get_config = get_project_context("get_config")
        conf = get_config(str(Path.cwd()), env)
        create_catalog = get_project_context("create_catalog")
        catalog = create_catalog(config=conf)
        return pipeline, catalog
    except (ImportError, KeyError):
        raise KedroCliError(ERROR_PROJECT_ROOT)

示例#3

0

显示文件

def docker_init(spark):
    """Initialize a Dockerfile for the project."""
    project_path = get_project_context("project_path")

    template_path = Path(__file__).parent / "template"
    verbose = get_project_context("verbose")
    docker_file_version = "spark" if spark else "simple"
    docker_file = f"Dockerfile.{docker_file_version}"
    copy_template_files(
        project_path,
        template_path,
        [docker_file, ".dockerignore", ".dive-ci"],
        verbose,
    )

示例#4

0

显示文件

文件： plugin.py 项目： zhangchi1/kedro-airflow

def create():
    """Create an Airflow DAG for a project"""

    src_file = Path(__file__).parent / "dag_template.py"
    dest_file = _get_dag_filename()
    dest_file.parent.mkdir(parents=True, exist_ok=True)
    template = Template(src_file.read_text(encoding="utf-8"),
                        keep_trailing_newline=True)

    try:
        from kedro.context import (  # noqa:F401 pylint: disable=unused-import
            load_context, )

        context_compatibility_mode = False
    except ImportError:  # pragma: no coverage
        context_compatibility_mode = True

    dest_file.write_text(
        template.render(
            project_name=get_project_context("project_name"),
            project_path=get_project_context("project_path"),
            context_compatibility_mode=context_compatibility_mode,
        ),
        encoding="utf-8",
    )

    secho("")
    secho("An Airflow DAG has been generated in:", fg="green")
    secho(str(dest_file))
    secho("This file should be copied to your Airflow DAG folder.",
          fg="yellow")
    secho("The Airflow configuration can be customized by editing this file.",
          fg="green")
    secho("")
    secho(
        "This file also contains the path to the config directory, this directory will need to "
        "be available to Airflow and any workers.",
        fg="yellow",
    )
    secho("")
    secho(
        "Additionally all data sets must have an entry in the data catalog.",
        fg="yellow",
    )
    secho(
        "And all local paths in both the data catalog and log config must be absolute paths.",
        fg="yellow",
    )
    secho("")

示例#5

0

显示文件

def argokedro(image, templates_folder, ytt, namespace):
    """Creates an argo pipeline yaml
    """
    pc = cli.get_project_context()
    pipeline = pc.pipeline
    project_name = pc.project_name
    parameters = pc.catalog.load("parameters")
    pretty_params = transform_parameters(parameters)
    dependencies = pipeline.node_dependencies
    deps_dict = get_deps_dict(dependencies)
    tags = get_tags(pipeline)
    tagged_deps_dict = update_deps_dict_with_tags(deps_dict, tags)
    kedro_dict = {
        "tasks": tagged_deps_dict,
        "image": image,
        "project_name": project_name,
        "parameters": pretty_params,
        "namespace": namespace,
    }
    kedro_yaml = generate_yaml(kedro_dict)
    if ytt:
        kedro_yaml = ytt_add_values_part(kedro_yaml)
        copy_template(templates_folder, ytt)
        logging.info(f"YTT template saved in {templates_folder} folder")
    save_yaml(kedro_yaml, templates_folder)
    logging.info(f"Kedro template saved in {templates_folder} folder")
    if ytt:
        click.secho(FINISHED_MESSAGE_YTT)

示例#6

0

显示文件

文件： plugin.py 项目： i25959341/kedro-airflow

def import_line(name):
    """generate an import line for something in the project_context"""
    func = get_project_context(name)
    res = "from {} import {}".format(func.__module__, func.__name__)
    if func.__name__ != name:
        res = "{} as {}".format(res, name)
    return res

示例#7

0

显示文件

def _mount_info() -> Dict[str, Union[str, Tuple]]:
    project_path = get_project_context("project_path")
    res = dict(
        host_root=str(project_path),
        container_root="/home/kedro",
        mount_volumes=DOCKER_DEFAULT_VOLUMES,
    )
    return res

示例#8

0

显示文件

文件： server.py 项目： shaunwallace/kedro-viz

def nodes():
    """Serve the pipeline data."""
    pipeline = get_project_context("create_pipeline")()
    return jsonify([{
        "name": n.name,
        "inputs": [ds.split("@")[0] for ds in n.inputs],
        "outputs": [ds.split("@")[0] for ds in n.outputs],
        "tags": list(n.tags),
    } for n in pipeline.nodes])

示例#9

0

显示文件

文件： plugin.py 项目： i25959341/kedro-airflow

def create():
    """Create an Airflow DAG for a project"""

    src_file = Path(__file__).parent / "dag_template.py"
    dest_file = _get_dag_filename()
    dest_file.parent.mkdir(parents=True, exist_ok=True)
    template = Template(src_file.read_text(encoding="utf-8"),
                        keep_trailing_newline=True)
    dest_file.write_text(
        template.render(
            project_name=get_project_context("project_name"),
            import_get_config=import_line("get_config"),
            import_create_catalog=import_line("create_catalog"),
            import_create_pipeline=import_line("create_pipeline"),
            project_path=get_project_context("project_path"),
        ),
        encoding="utf-8",
    )

    secho("")
    secho("An Airflow DAG has been generated in:", fg="green")
    secho(str(dest_file))
    secho("This file should be copied to your Airflow DAG folder.",
          fg="yellow")
    secho("The Airflow configuration can be customized by editing this file.",
          fg="green")
    secho("")
    secho(
        "This file also contains the path to the config directory, this directory will need to "
        "be available to Airflow and any workers.",
        fg="yellow",
    )
    secho("")
    secho(
        "Additionally all data sets must have an entry in the data catalog.",
        fg="yellow",
    )
    secho(
        "And all local paths in both the data catalog and log config must be absolute paths.",
        fg="yellow",
    )
    secho("")

示例#10

0

显示文件

def nodes_json():
    """Serve the pipeline data."""
    def pretty_name(name):
        name = name.replace("-", " ").replace("_", " ")
        parts = [n[0].upper() + n[1:] for n in name.split()]
        return " ".join(parts)

    pipeline = get_project_context("create_pipeline")()

    nodes = []
    edges = []
    namespace_tags = defaultdict(set)
    all_tags = set()

    for node in sorted(pipeline.nodes):
        task_id = "task/" + node.name.replace(" ", "")
        nodes.append({
            "type": "task",
            "id": task_id,
            "name": getattr(node, "short_name", node.name),
            "full_name": str(node),
            "tags": sorted(node.tags),
        })
        all_tags.update(node.tags)
        for data_set in node.inputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": "data/" + namespace, "target": task_id})
            namespace_tags[namespace].update(node.tags)
        for data_set in node.outputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": task_id, "target": "data/" + namespace})
            namespace_tags[namespace].update(node.tags)

    for namespace, tags in sorted(namespace_tags.items()):
        nodes.append({
            "type": "data",
            "id": "data/" + namespace,
            "name": pretty_name(namespace),
            "full_name": namespace,
            "tags": sorted(tags),
            "is_parameters": bool("param" in namespace.lower()),
        })

    tags = []
    for tag in sorted(all_tags):
        tags.append({"id": tag, "name": pretty_name(tag)})

    return jsonify(
        {"snapshots": [{
            "nodes": nodes,
            "edges": edges,
            "tags": tags
        }]})

示例#11

0

显示文件

文件： server.py 项目： OFranke/kedro-viz

def get_data_from_kedro():
    """ Get pipeline data from Kedro and format it appropriately """
    def pretty_name(name):
        name = name.replace("-", " ").replace("_", " ")
        parts = [n[0].upper() + n[1:] for n in name.split()]
        return " ".join(parts)

    pipeline = get_project_context("create_pipeline")()

    nodes = []
    edges = []
    namespace_tags = defaultdict(set)
    all_tags = set()

    for node in sorted(pipeline.nodes, key=lambda n: n.name):
        task_id = _hash(str(node))
        nodes.append({
            "type": "task",
            "id": task_id,
            "name": getattr(node, "short_name", node.name),
            "full_name": getattr(node, "_func_name", str(node)),
            "tags": sorted(node.tags),
        })
        all_tags.update(node.tags)
        for data_set in node.inputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": _hash(namespace), "target": task_id})
            namespace_tags[namespace].update(node.tags)
        for data_set in node.outputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": task_id, "target": _hash(namespace)})
            namespace_tags[namespace].update(node.tags)

    for namespace, tags in sorted(namespace_tags.items()):
        is_param = bool("param" in namespace.lower())
        nodes.append({
            "type": "parameters" if is_param else "data",
            "id": _hash(namespace),
            "name": pretty_name(namespace),
            "full_name": namespace,
            "tags": sorted(tags),
        })

    tags = []
    for tag in sorted(all_tags):
        tags.append({"id": tag, "name": pretty_name(tag)})

    return {"nodes": nodes, "edges": edges, "tags": tags}

示例#12

0

显示文件

def docker_build(ctx, uid, gid, spark, base_image, image, docker_args):
    """Build a Docker image for the project."""
    uid, gid = get_uid_gid(uid, gid)
    project_path = get_project_context("project_path")
    image = image or str(project_path.name)

    ctx.invoke(docker_init, spark=spark)

    combined_args = compose_docker_run_args(
        required_args=[
            ("--build-arg", f"KEDRO_UID={uid}"),
            ("--build-arg", f"KEDRO_GID={gid}"),
            ("--build-arg", f"BASE_IMAGE={base_image}"),
        ],
        # add image tag if only it is not already supplied by the user
        optional_args=[("-t", image)],
        user_args=docker_args,
    )
    command = ["docker", "build"] + combined_args + [str(project_path)]
    call(command)

示例#13

0

显示文件

def _call_viz(
    host=None,
    port=None,
    browser=None,
    load_file=None,
    save_file=None,
    pipeline_name=None,
    env=None,
):
    global data  # pylint: disable=global-statement,invalid-name

    if load_file:
        data = _load_from_file(load_file)
    else:
        if match(kedro.__version__, ">=0.15.0"):
            from kedro.context import KedroContextError

            try:
                context = get_project_context("context", env=env)
                pipeline = _get_pipeline_from_context(context, pipeline_name)
            except KedroContextError:
                raise KedroCliError(ERROR_PROJECT_ROOT)
            catalog = context.catalog

        else:
            # Kedro 0.14.*
            if pipeline_name:
                raise KedroCliError(ERROR_PIPELINE_FLAG_NOT_SUPPORTED)
            pipeline, catalog = _get_pipeline_catalog_from_kedro14(env)

        data = format_pipeline_data(pipeline, catalog)

    if save_file:
        Path(save_file).write_text(json.dumps(data, indent=4, sort_keys=True))
    else:
        if browser:
            webbrowser.open_new("http://127.0.0.1:{:d}/".format(port))
        app.run(host=host, port=port)

示例#14

0

显示文件

文件： plugin.py 项目： konferzehl/kedro-pandas-profiling

def profile(name):
    """ Kedro plugin for utilizing Pandas Profiling """
    conf_dict = kedro_conf_path()
    catalog_df = get_catalog_details(conf_dict)
    project_path = get_project_context("project_path")

    if name == None:
        print(catalog_df)
    else:
        data_path = catalog_df.at[name, "filepath"]
        data = pd_reader(project_path / data_path)

        print(f"Profiling {name} DataSet...")

        profile = data.profile_report(title=f"DataSet {name} - Profile Report",
                                      pool_size=0)

        output_path = Path.joinpath(project_path,
                                    f"data/08_reporting/{name}.html")
        profile.to_file(output_file=output_path)

        print(f"{name.title()} profile printed to {output_path}")

        return None

示例#15

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_get_context_without_project_path(self, mocked_load_context):
     dummy_context = get_project_context("context")
     mocked_load_context.assert_called_once_with(Path.cwd())
     assert isinstance(dummy_context, DummyContext)

示例#16

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_verbose(self):
     assert not get_project_context("verbose")

示例#17

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_project_path(self):
     key = "project_path"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         assert get_project_context(key) == "dummy_path"

示例#18

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_template_version(self):
     key = "template_version"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         assert get_project_context(key) == "dummy_version"

示例#19

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_create_pipeline(self):
     key = "create_pipeline"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         pipeline = get_project_context(key)
         assert pipeline() == "pipeline"

示例#20

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_create_catalog(self):
     key = "create_catalog"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         catalog = get_project_context(key)
         assert catalog("config") == "catalog"

示例#21

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_get_config(self, tmp_path):
     key = "get_config"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         config_loader = get_project_context(key)
         assert config_loader(tmp_path) == "config_loader"

示例#22

0

显示文件

文件： test_cli.py 项目： zach-data/kedro

 def test_context(self):
     dummy_context = get_project_context("context")
     assert isinstance(dummy_context, DummyContext)

示例#23

0

显示文件

文件： plugin.py 项目： i25959341/kedro-airflow

def _get_dag_filename():
    project_path = get_project_context("project_path")
    project_name = get_project_context("project_name")
    dest_dir = project_path / "airflow_dags"
    return dest_dir / (slugify(project_name, separator="_") + "_dag.py")

示例#24

0

显示文件

def _image_callback(ctx, param, value):  # pylint: disable=unused-argument
    image = value or str(get_project_context("project_path").name)
    check_docker_image_exists(image)
    return image

示例#25

0

显示文件

文件： test_cli.py 项目： yogeshkumark/kedro

 def test_get_context_with_project_path(self, tmpdir, mocked_load_context):
     dummy_project_path = tmpdir.mkdir("dummy_project")
     dummy_context = get_project_context("context", project_path=dummy_project_path)
     mocked_load_context.assert_called_once_with(dummy_project_path)
     assert isinstance(dummy_context, DummyContext)