Пример #1
0
def lineage_graph_command(
    workspace: Workspace,
    output_file: str,
    resource_name: Optional[str],
    snapshot: Optional[str],
    format="html",
    width: int = 1024,
    height: int = 800,
) -> None:
    if not isinstance(workspace, SnapshotWorkspaceMixin):
        raise ConfigurationError(
            "Workspace %s does not support snapshots and lineage" %
            workspace.name)
    if not workspace.supports_lineage():
        raise ConfigurationError("Workspace %s does not support lineage" %
                                 workspace.name)
    store = workspace.get_lineage_store()

    snapshot_hash = None  # type: Optional[str]
    if snapshot is not None:
        md = workspace.get_snapshot_by_tag_or_hash(snapshot)
        snapshot_hash = md.hashval
    if resource_name is not None:
        workspace.validate_resource_name(resource_name)
    else:
        for r in workspace.get_resource_names():
            if workspace.get_resource_role(r) == ResourceRoles.RESULTS:
                resource_name = r
                break
        if resource_name is None:
            raise ConfigurationError(
                "Did not find a results resource in workspace. If you want to graph the lineage of a non-results resource, use the --resource option."
            )
    make_simplified_lineage_graph_for_resource(
        workspace.get_instance(),
        store,
        resource_name,
        output_file,
        snapshot_hash=snapshot_hash,
        format=format,
        width=width,
        height=height,
    )
    if snapshot is None:
        click.echo("Wrote lineage for %s to %s" % (resource_name, output_file))
    else:
        click.echo("Wrote lineage for %s as of snapshot %s to %s" %
                   (resource_name, snapshot, output_file))
Пример #2
0
    def __init__(
        self,
        step_name: str,
        start_time: datetime.datetime,
        parameters: Dict[str, Any],
        inputs: List[Union[str, ResourceRef]],
        code: List[Union[str, ResourceRef]],
        workspace: Workspace,
        command_line: Optional[List[str]] = None,
        current_directory: Optional[str] = None,
    ):
        self.workspace = workspace  # type: Workspace
        self.instance = workspace.get_instance()
        # if not isinstance(workspace, SnapshotWorkspaceMixin) or not workspace.supports_lineage():
        #     raise ConfigurationError("Backend for workspace %s does not support lineage" % workspace.name)
        self.store = cast(SnapshotWorkspaceMixin,
                          workspace).get_lineage_store()
        input_resource_refs = []  # type: List[ResourceRef]
        for r_or_p in inputs:
            if isinstance(r_or_p, ResourceRef):
                workspace.validate_resource_name(r_or_p.name, r_or_p.subpath)
                input_resource_refs.append(r_or_p)
            else:
                ref = workspace.map_local_path_to_resource(r_or_p)
                input_resource_refs.append(ref)
        code_resource_refs = []  # type: List[ResourceRef]
        for r_or_p in code:
            if isinstance(r_or_p, ResourceRef):
                self.workspace.validate_resource_name(
                    r_or_p.name,
                    r_or_p.subpath,
                    expected_role=ResourceRoles.CODE)
                code_resource_refs.append(r_or_p)
            else:
                ref = workspace.map_local_path_to_resource(
                    r_or_p, expecting_a_code_resource=True)
                # For now, we will resolve code paths at the resource level.
                # We drop the subpath, unless the user provided it explicitly
                # through a ResourceRef.
                crr = ResourceRef(ref.name, None)
                if crr not in code_resource_refs:
                    code_resource_refs.append(crr)

        # The run_from_directory can be either a resource reference (best),
        # a path on the local filesystem, or None
        try:
            if current_directory is not None:
                if not isabs(current_directory):
                    current_directory = abspath(expanduser(
                        (current_directory)))
                run_from_directory = workspace.map_local_path_to_resource(
                    current_directory)  # type: Optional[ResourceRef]
            else:
                run_from_directory = None
        except PathNotAResourceError:
            run_from_directory = None

        self.step = StepLineage.make_step_lineage(
            workspace.get_instance(),
            step_name,
            start_time,
            parameters,
            input_resource_refs,
            code_resource_refs,
            self.store,
            command_line=command_line,
            run_from_directory=run_from_directory,
        )
        self.in_progress = True