示例#1
0
def docker_build(uid, gid, image, docker_args):
    """Build a Docker image for the project."""

    uid, gid = get_uid_gid(uid, gid)
    project_path = get_project_context("project_path")
    image = image or str(project_path.name)

    template_path = Path(__file__).parent / "template"
    verbose = get_project_context("verbose")
    copy_template_files(
        project_path,
        template_path,
        ["Dockerfile", ".dockerignore", ".dive-ci"],
        verbose,
    )

    combined_args = compose_docker_run_args(
        required_args=[
            ("--build-arg", "KEDRO_UID={0}".format(uid)),
            ("--build-arg", "KEDRO_GID={0}".format(gid)),
        ],
        # add image tag if only it is not already supplied by the user
        optional_args=[("-t", image)],
        user_args=docker_args,
    )
    command = ["docker", "build"] + combined_args + [str(project_path)]
    call(command)
示例#2
0
def _get_pipeline_catalog_from_kedro14(env):
    try:
        pipeline = get_project_context("create_pipeline")()
        get_config = get_project_context("get_config")
        conf = get_config(str(Path.cwd()), env)
        create_catalog = get_project_context("create_catalog")
        catalog = create_catalog(config=conf)
        return pipeline, catalog
    except (ImportError, KeyError):
        raise KedroCliError(ERROR_PROJECT_ROOT)
示例#3
0
def docker_init(spark):
    """Initialize a Dockerfile for the project."""
    project_path = get_project_context("project_path")

    template_path = Path(__file__).parent / "template"
    verbose = get_project_context("verbose")
    docker_file_version = "spark" if spark else "simple"
    docker_file = f"Dockerfile.{docker_file_version}"
    copy_template_files(
        project_path,
        template_path,
        [docker_file, ".dockerignore", ".dive-ci"],
        verbose,
    )
示例#4
0
def create():
    """Create an Airflow DAG for a project"""

    src_file = Path(__file__).parent / "dag_template.py"
    dest_file = _get_dag_filename()
    dest_file.parent.mkdir(parents=True, exist_ok=True)
    template = Template(src_file.read_text(encoding="utf-8"),
                        keep_trailing_newline=True)

    try:
        from kedro.context import (  # noqa:F401 pylint: disable=unused-import
            load_context, )

        context_compatibility_mode = False
    except ImportError:  # pragma: no coverage
        context_compatibility_mode = True

    dest_file.write_text(
        template.render(
            project_name=get_project_context("project_name"),
            project_path=get_project_context("project_path"),
            context_compatibility_mode=context_compatibility_mode,
        ),
        encoding="utf-8",
    )

    secho("")
    secho("An Airflow DAG has been generated in:", fg="green")
    secho(str(dest_file))
    secho("This file should be copied to your Airflow DAG folder.",
          fg="yellow")
    secho("The Airflow configuration can be customized by editing this file.",
          fg="green")
    secho("")
    secho(
        "This file also contains the path to the config directory, this directory will need to "
        "be available to Airflow and any workers.",
        fg="yellow",
    )
    secho("")
    secho(
        "Additionally all data sets must have an entry in the data catalog.",
        fg="yellow",
    )
    secho(
        "And all local paths in both the data catalog and log config must be absolute paths.",
        fg="yellow",
    )
    secho("")
示例#5
0
def argokedro(image, templates_folder, ytt, namespace):
    """Creates an argo pipeline yaml
    """
    pc = cli.get_project_context()
    pipeline = pc.pipeline
    project_name = pc.project_name
    parameters = pc.catalog.load("parameters")
    pretty_params = transform_parameters(parameters)
    dependencies = pipeline.node_dependencies
    deps_dict = get_deps_dict(dependencies)
    tags = get_tags(pipeline)
    tagged_deps_dict = update_deps_dict_with_tags(deps_dict, tags)
    kedro_dict = {
        "tasks": tagged_deps_dict,
        "image": image,
        "project_name": project_name,
        "parameters": pretty_params,
        "namespace": namespace,
    }
    kedro_yaml = generate_yaml(kedro_dict)
    if ytt:
        kedro_yaml = ytt_add_values_part(kedro_yaml)
        copy_template(templates_folder, ytt)
        logging.info(f"YTT template saved in {templates_folder} folder")
    save_yaml(kedro_yaml, templates_folder)
    logging.info(f"Kedro template saved in {templates_folder} folder")
    if ytt:
        click.secho(FINISHED_MESSAGE_YTT)
示例#6
0
def import_line(name):
    """generate an import line for something in the project_context"""
    func = get_project_context(name)
    res = "from {} import {}".format(func.__module__, func.__name__)
    if func.__name__ != name:
        res = "{} as {}".format(res, name)
    return res
示例#7
0
def _mount_info() -> Dict[str, Union[str, Tuple]]:
    project_path = get_project_context("project_path")
    res = dict(
        host_root=str(project_path),
        container_root="/home/kedro",
        mount_volumes=DOCKER_DEFAULT_VOLUMES,
    )
    return res
示例#8
0
def nodes():
    """Serve the pipeline data."""
    pipeline = get_project_context("create_pipeline")()
    return jsonify([{
        "name": n.name,
        "inputs": [ds.split("@")[0] for ds in n.inputs],
        "outputs": [ds.split("@")[0] for ds in n.outputs],
        "tags": list(n.tags),
    } for n in pipeline.nodes])
示例#9
0
def create():
    """Create an Airflow DAG for a project"""

    src_file = Path(__file__).parent / "dag_template.py"
    dest_file = _get_dag_filename()
    dest_file.parent.mkdir(parents=True, exist_ok=True)
    template = Template(src_file.read_text(encoding="utf-8"),
                        keep_trailing_newline=True)
    dest_file.write_text(
        template.render(
            project_name=get_project_context("project_name"),
            import_get_config=import_line("get_config"),
            import_create_catalog=import_line("create_catalog"),
            import_create_pipeline=import_line("create_pipeline"),
            project_path=get_project_context("project_path"),
        ),
        encoding="utf-8",
    )

    secho("")
    secho("An Airflow DAG has been generated in:", fg="green")
    secho(str(dest_file))
    secho("This file should be copied to your Airflow DAG folder.",
          fg="yellow")
    secho("The Airflow configuration can be customized by editing this file.",
          fg="green")
    secho("")
    secho(
        "This file also contains the path to the config directory, this directory will need to "
        "be available to Airflow and any workers.",
        fg="yellow",
    )
    secho("")
    secho(
        "Additionally all data sets must have an entry in the data catalog.",
        fg="yellow",
    )
    secho(
        "And all local paths in both the data catalog and log config must be absolute paths.",
        fg="yellow",
    )
    secho("")
示例#10
0
def nodes_json():
    """Serve the pipeline data."""
    def pretty_name(name):
        name = name.replace("-", " ").replace("_", " ")
        parts = [n[0].upper() + n[1:] for n in name.split()]
        return " ".join(parts)

    pipeline = get_project_context("create_pipeline")()

    nodes = []
    edges = []
    namespace_tags = defaultdict(set)
    all_tags = set()

    for node in sorted(pipeline.nodes):
        task_id = "task/" + node.name.replace(" ", "")
        nodes.append({
            "type": "task",
            "id": task_id,
            "name": getattr(node, "short_name", node.name),
            "full_name": str(node),
            "tags": sorted(node.tags),
        })
        all_tags.update(node.tags)
        for data_set in node.inputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": "data/" + namespace, "target": task_id})
            namespace_tags[namespace].update(node.tags)
        for data_set in node.outputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": task_id, "target": "data/" + namespace})
            namespace_tags[namespace].update(node.tags)

    for namespace, tags in sorted(namespace_tags.items()):
        nodes.append({
            "type": "data",
            "id": "data/" + namespace,
            "name": pretty_name(namespace),
            "full_name": namespace,
            "tags": sorted(tags),
            "is_parameters": bool("param" in namespace.lower()),
        })

    tags = []
    for tag in sorted(all_tags):
        tags.append({"id": tag, "name": pretty_name(tag)})

    return jsonify(
        {"snapshots": [{
            "nodes": nodes,
            "edges": edges,
            "tags": tags
        }]})
示例#11
0
def get_data_from_kedro():
    """ Get pipeline data from Kedro and format it appropriately """
    def pretty_name(name):
        name = name.replace("-", " ").replace("_", " ")
        parts = [n[0].upper() + n[1:] for n in name.split()]
        return " ".join(parts)

    pipeline = get_project_context("create_pipeline")()

    nodes = []
    edges = []
    namespace_tags = defaultdict(set)
    all_tags = set()

    for node in sorted(pipeline.nodes, key=lambda n: n.name):
        task_id = _hash(str(node))
        nodes.append({
            "type": "task",
            "id": task_id,
            "name": getattr(node, "short_name", node.name),
            "full_name": getattr(node, "_func_name", str(node)),
            "tags": sorted(node.tags),
        })
        all_tags.update(node.tags)
        for data_set in node.inputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": _hash(namespace), "target": task_id})
            namespace_tags[namespace].update(node.tags)
        for data_set in node.outputs:
            namespace = data_set.split("@")[0]
            edges.append({"source": task_id, "target": _hash(namespace)})
            namespace_tags[namespace].update(node.tags)

    for namespace, tags in sorted(namespace_tags.items()):
        is_param = bool("param" in namespace.lower())
        nodes.append({
            "type": "parameters" if is_param else "data",
            "id": _hash(namespace),
            "name": pretty_name(namespace),
            "full_name": namespace,
            "tags": sorted(tags),
        })

    tags = []
    for tag in sorted(all_tags):
        tags.append({"id": tag, "name": pretty_name(tag)})

    return {"nodes": nodes, "edges": edges, "tags": tags}
示例#12
0
def docker_build(ctx, uid, gid, spark, base_image, image, docker_args):
    """Build a Docker image for the project."""
    uid, gid = get_uid_gid(uid, gid)
    project_path = get_project_context("project_path")
    image = image or str(project_path.name)

    ctx.invoke(docker_init, spark=spark)

    combined_args = compose_docker_run_args(
        required_args=[
            ("--build-arg", f"KEDRO_UID={uid}"),
            ("--build-arg", f"KEDRO_GID={gid}"),
            ("--build-arg", f"BASE_IMAGE={base_image}"),
        ],
        # add image tag if only it is not already supplied by the user
        optional_args=[("-t", image)],
        user_args=docker_args,
    )
    command = ["docker", "build"] + combined_args + [str(project_path)]
    call(command)
示例#13
0
def _call_viz(
    host=None,
    port=None,
    browser=None,
    load_file=None,
    save_file=None,
    pipeline_name=None,
    env=None,
):
    global data  # pylint: disable=global-statement,invalid-name

    if load_file:
        data = _load_from_file(load_file)
    else:
        if match(kedro.__version__, ">=0.15.0"):
            from kedro.context import KedroContextError

            try:
                context = get_project_context("context", env=env)
                pipeline = _get_pipeline_from_context(context, pipeline_name)
            except KedroContextError:
                raise KedroCliError(ERROR_PROJECT_ROOT)
            catalog = context.catalog

        else:
            # Kedro 0.14.*
            if pipeline_name:
                raise KedroCliError(ERROR_PIPELINE_FLAG_NOT_SUPPORTED)
            pipeline, catalog = _get_pipeline_catalog_from_kedro14(env)

        data = format_pipeline_data(pipeline, catalog)

    if save_file:
        Path(save_file).write_text(json.dumps(data, indent=4, sort_keys=True))
    else:
        if browser:
            webbrowser.open_new("http://127.0.0.1:{:d}/".format(port))
        app.run(host=host, port=port)
示例#14
0
def profile(name):
    """ Kedro plugin for utilizing Pandas Profiling """
    conf_dict = kedro_conf_path()
    catalog_df = get_catalog_details(conf_dict)
    project_path = get_project_context("project_path")

    if name == None:
        print(catalog_df)
    else:
        data_path = catalog_df.at[name, "filepath"]
        data = pd_reader(project_path / data_path)

        print(f"Profiling {name} DataSet...")

        profile = data.profile_report(title=f"DataSet {name} - Profile Report",
                                      pool_size=0)

        output_path = Path.joinpath(project_path,
                                    f"data/08_reporting/{name}.html")
        profile.to_file(output_file=output_path)

        print(f"{name.title()} profile printed to {output_path}")

        return None
示例#15
0
 def test_get_context_without_project_path(self, mocked_load_context):
     dummy_context = get_project_context("context")
     mocked_load_context.assert_called_once_with(Path.cwd())
     assert isinstance(dummy_context, DummyContext)
示例#16
0
 def test_verbose(self):
     assert not get_project_context("verbose")
示例#17
0
 def test_project_path(self):
     key = "project_path"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         assert get_project_context(key) == "dummy_path"
示例#18
0
 def test_template_version(self):
     key = "template_version"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         assert get_project_context(key) == "dummy_version"
示例#19
0
 def test_create_pipeline(self):
     key = "create_pipeline"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         pipeline = get_project_context(key)
         assert pipeline() == "pipeline"
示例#20
0
 def test_create_catalog(self):
     key = "create_catalog"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         catalog = get_project_context(key)
         assert catalog("config") == "catalog"
示例#21
0
 def test_get_config(self, tmp_path):
     key = "get_config"
     pattern = self._deprecation_msg(key)
     with warns(DeprecationWarning, match=pattern):
         config_loader = get_project_context(key)
         assert config_loader(tmp_path) == "config_loader"
示例#22
0
 def test_context(self):
     dummy_context = get_project_context("context")
     assert isinstance(dummy_context, DummyContext)
示例#23
0
def _get_dag_filename():
    project_path = get_project_context("project_path")
    project_name = get_project_context("project_name")
    dest_dir = project_path / "airflow_dags"
    return dest_dir / (slugify(project_name, separator="_") + "_dag.py")
示例#24
0
def _image_callback(ctx, param, value):  # pylint: disable=unused-argument
    image = value or str(get_project_context("project_path").name)
    check_docker_image_exists(image)
    return image
示例#25
0
 def test_get_context_with_project_path(self, tmpdir, mocked_load_context):
     dummy_project_path = tmpdir.mkdir("dummy_project")
     dummy_context = get_project_context("context", project_path=dummy_project_path)
     mocked_load_context.assert_called_once_with(dummy_project_path)
     assert isinstance(dummy_context, DummyContext)