Пример #1
0
def _find_resource(
        workspace: Workspace,
        role: str,
        name_or_ref: Optional[Union[str, ResourceRef]] = None) -> ResourceRef:
    resource_names = [n for n in workspace.get_resource_names()]
    if isinstance(name_or_ref, str):
        if ((not name_or_ref.startswith("./"))
                and (not name_or_ref.startswith("/"))
                and (name_or_ref in resource_names)):
            return ResourceRef(name_or_ref)
        elif exists(name_or_ref):
            return workspace.map_local_path_to_resource(
                name_or_ref, expecting_a_code_resource=False)
        else:
            raise LineageError(
                "Could not find a resource for '" + name_or_ref +
                "' with role '" + role +
                "' in your workspace. Please create a resource" +
                " using the 'dws add' command or correct the name. " +
                "Currently defined resources are: " + ", ".join([
                    "%s (role %s)" % (n, workspace.get_resource_role(n))
                    for n in resource_names
                ]) + ".")
    elif isinstance(name_or_ref, ResourceRef):
        workspace.validate_resource_name(name_or_ref.name, name_or_ref.subpath)
        return name_or_ref
    else:
        # no resource specified. If we have exactly one for that role,
        # we will use it
        resource_for_role = None
        for rname in workspace.get_resource_names():
            if workspace.get_resource_role(rname) == role:
                if resource_for_role is None:
                    resource_for_role = ResourceRef(rname, subpath=None)
                else:
                    raise LineageError(
                        "There is more than one resource for role " + role +
                        " in your workspace. Please specify the resource you want"
                        +
                        " in model wrapping function or use a wrapped data set"
                    )
        if resource_for_role is not None:
            return resource_for_role
        else:
            raise LineageError(
                "Could not find a " + role +
                " resource in your workspace. Please create a resource" +
                " using the dws add command.")
Пример #2
0
def build_resource_list(
    workspace: Workspace, only: Optional[List[str]], skip: Optional[List[str]]
) -> List[str]:
    """Build up our resource name list for either push or pull commands.
    """
    if (only is not None) and (skip is not None):
        raise ConfigurationError("Cannot specify both --only and --skip")
    all_resource_names_set = frozenset(workspace.get_resource_names())
    local_state_names_set = frozenset(workspace.get_names_of_resources_with_local_state())
    if only is not None:
        only_set = frozenset(only)
        invalid = only_set.difference(all_resource_names_set)
        if len(invalid) > 0:
            raise ConfigurationError(
                "Invalid resource names were included with --only: %s" % ", ".join(sorted(invalid))
            )
        nonsync_rnames = only_set.difference(local_state_names_set)
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        return [rn for rn in only if rn in local_state_names_set]
    elif skip is not None:
        skip_set = frozenset(skip)
        invalid = skip_set.difference(all_resource_names_set)
        if len(invalid) > 0:
            raise ConfigurationError(
                "Invalid resource names were included with --skip: %s" % ", ".join(sorted(invalid))
            )
        nonsync_rnames = all_resource_names_set.difference(skip_set).difference(
            local_state_names_set
        )
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        skip_set = skip_set.union(nonsync_rnames)
        return [rn for rn in workspace.get_resource_names() if rn not in skip_set]
    else:
        nonsync_rnames = all_resource_names_set.difference(local_state_names_set)
        if len(nonsync_rnames) > 0:
            click.echo(
                "Skipping the following resources, which do not have local state: %s"
                % ", ".join(sorted(nonsync_rnames))
            )
        return [rn for rn in workspace.get_resource_names() if rn not in nonsync_rnames]
Пример #3
0
def add_command(scheme: str, role: str, name: str, workspace: Workspace,
                *args):
    current_names = set(workspace.get_resource_names())
    if workspace.batch:
        if name == None:
            name = workspace.suggest_resource_name(scheme, role, *args)
        else:
            if name in current_names:
                raise ConfigurationError("Resource name '%s' already in use" %
                                         name)
    else:
        suggested_name = None
        while (name is None) or (name in current_names):
            if suggested_name == None:
                suggested_name = workspace.suggest_resource_name(
                    scheme, role, *args)
            name = click.prompt(
                "Please enter a short, unique name for this resource",
                default=suggested_name)
            if name in current_names:
                click.echo("Resource name '%s' already in use." % name,
                           err=True)

    workspace.add_resource(name, scheme, role, *args)
    workspace.save("add of %s" % name)
    click.echo("Successful added resource '%s' to workspace." % name)
Пример #4
0
def lineage_graph_command(
    workspace: Workspace,
    output_file: str,
    resource_name: Optional[str],
    snapshot: Optional[str],
    format="html",
    width: int = 1024,
    height: int = 800,
) -> None:
    if not isinstance(workspace, SnapshotWorkspaceMixin):
        raise ConfigurationError(
            "Workspace %s does not support snapshots and lineage" %
            workspace.name)
    if not workspace.supports_lineage():
        raise ConfigurationError("Workspace %s does not support lineage" %
                                 workspace.name)
    store = workspace.get_lineage_store()

    snapshot_hash = None  # type: Optional[str]
    if snapshot is not None:
        md = workspace.get_snapshot_by_tag_or_hash(snapshot)
        snapshot_hash = md.hashval
    if resource_name is not None:
        workspace.validate_resource_name(resource_name)
    else:
        for r in workspace.get_resource_names():
            if workspace.get_resource_role(r) == ResourceRoles.RESULTS:
                resource_name = r
                break
        if resource_name is None:
            raise ConfigurationError(
                "Did not find a results resource in workspace. If you want to graph the lineage of a non-results resource, use the --resource option."
            )
    make_simplified_lineage_graph_for_resource(
        workspace.get_instance(),
        store,
        resource_name,
        output_file,
        snapshot_hash=snapshot_hash,
        format=format,
        width=width,
        height=height,
    )
    if snapshot is None:
        click.echo("Wrote lineage for %s to %s" % (resource_name, output_file))
    else:
        click.echo("Wrote lineage for %s as of snapshot %s to %s" %
                   (resource_name, snapshot, output_file))
Пример #5
0
def print_resource_status(workspace: Workspace):
    names_by_role = {role: []
                     for role in RESOURCE_ROLE_CHOICES
                     }  # type:Dict[str,List[str]]
    resource_names = []
    roles = []
    types = []
    params = []
    missing_roles = []
    # we are going to order resources by role
    for rname in workspace.get_resource_names():
        role = workspace.get_resource_role(rname)
        names_by_role[role].append(rname)
    for role in RESOURCE_ROLE_CHOICES:
        if len(names_by_role[role]) > 0:
            for rname in names_by_role[role]:
                resource_names.append(rname)
                roles.append(role)
                types.append(workspace.get_resource_type(rname))
                params.append(",\n".join([
                    "%s=%s" % (pname, pval) for (
                        pname,
                        pval) in workspace._get_resource_params(rname).items()
                    if pname not in ("resource_type", "name", "role")
                ]))
        else:
            missing_roles.append(role)
    print_columns(
        {
            "Resource": resource_names,
            "Role": roles,
            "Type": types,
            "Parameters": params
        },
        # spec={'Parameters':ColSpec(width=40)},
        null_value="",
        title="Resources for workspace: %s" % workspace.name,
        paginate=False,
    )
    if len(missing_roles) > 0:
        click.echo("No resources for the following roles: %s." %
                   ", ".join(missing_roles))
Пример #6
0
def config_command(
    workspace: Workspace,
    param_name: Optional[str],
    param_value: Optional[str],
    resource: Optional[str],
):
    if param_name is None and param_value is None:
        names = []
        scopes = []
        values = []
        isdefaults = []
        helps = []
        if resource is None:
            handlers = [
                GlobalWorkspaceHandler(workspace),
                LocalWorkspaceHandler(workspace),
            ]  # type: List[ParamConfigHandler]
        else:
            if resource not in workspace.get_resource_names():
                raise ConfigurationError(
                    "No resource in this workspace with name '%s'" % resource)
            resource_obj = workspace.get_resource(resource)
            handlers = [GlobalResourceHandler(resource_obj, workspace)]
            if isinstance(resource_obj, LocalStateResourceMixin):
                handlers.append(LocalResourceHandler(resource_obj, workspace))
        for handler in handlers:
            for name in handler.defs.keys():
                names.append(name)
                scopes.append(handler.get_scope())
                helps.append(handler.defs[name].help)
                values.append(handler.get_value(name))
                isdefaults.append("Y" if handler.is_default(name) else "N")
        print_columns(
            {
                "Name": names,
                "Scope": scopes,
                "Value": values,
                "Default?": isdefaults,
                "Description": helps,
            },
            spec={"Description": ColSpec(width=40)},
            paginate=False,
        )
        click.echo()
    else:
        assert param_name is not None
        if resource is None:
            if param_name in PARAM_DEFS:
                handler = GlobalWorkspaceHandler(workspace)
            elif param_name in LOCAL_PARAM_DEFS:
                handler = LocalWorkspaceHandler(workspace)
            else:
                raise ParamNotFoundError("No workspace parameter named '%s'" %
                                         param_name)
        else:  # resource-specific
            if resource not in workspace.get_resource_names():
                raise ConfigurationError(
                    "No resource in this workspace with name '%s'" % resource)
            resource_obj = workspace.get_resource(resource)
            if isinstance(resource_obj, LocalStateResourceMixin) and (
                    param_name in resource_obj.get_local_params()):
                handler = LocalResourceHandler(resource_obj, workspace)
            elif param_name in resource_obj.get_params().keys():
                handler = GlobalResourceHandler(resource_obj, workspace)
            else:
                raise ParamNotFoundError(
                    "Resource %s has no parameter named '%s'" %
                    (resource, param_name))

        if param_value is None:
            # just print for the specific param
            title = "%s parameter '%s'" % (handler.get_what_for().capitalize(),
                                           param_name)
            click.echo(title)
            click.echo("=" * len(title))
            click.echo()
            print_columns(
                {
                    "Value": [handler.get_value(param_name)],
                    "Scope": [handler.get_scope()],
                    "Default?":
                    ["Y" if handler.is_default(param_name) else "N"],
                    "Description": [handler.defs[param_name].help],
                },
                spec={"Description": ColSpec(width=60)},
                paginate=False,
            )
            click.echo()
        else:  # setting the parameter
            parsed_value = handler.defs[param_name].parse(param_value)
            handler.set_value(param_name,
                              handler.defs[param_name].to_json(parsed_value))
            param_for = handler.get_what_for()
            workspace.save("Update of %s parameter %s" %
                           (param_for, param_name))
            click.echo("Successfully set %s %s parameter '%s' to %s." %
                       (param_for, handler.get_scope(), param_name,
                        repr(parsed_value)))
Пример #7
0
def restore_command(
    workspace: Workspace,
    tag_or_hash: str,
    only: Optional[List[str]] = None,
    leave: Optional[List[str]] = None,
    strict: bool = False,
) -> int:
    """Run the restore and return the number of resources affected.
    """
    if not isinstance(workspace, SnapshotWorkspaceMixin):
        raise ConfigurationError("Workspace %s does not support snapshots" %
                                 workspace.name)
    mixin = cast(SnapshotWorkspaceMixin, workspace)
    # First, find the history entry
    md = mixin.get_snapshot_by_tag_or_hash(tag_or_hash)

    # process the lists of resources
    current_names = set(workspace.get_resource_names())
    # get the non-null resources in snapshot
    snapshot_names = set([
        rn for rn in md.restore_hashes.keys()
        if md.restore_hashes[rn] is not None
    ])
    all_names = current_names.union(snapshot_names)
    if (only is not None) and (leave is not None):
        raise ApiParamError(
            "Cannot specify both only and leave for restore command.")
    elif only is not None:
        # For only, we will be a little stricter, as the user is explicitly
        # specifying the resources.
        restore_set = set(only)
        strict = True
    elif leave is not None:
        restore_set = all_names.difference(leave)
    else:
        restore_set = all_names

    # We need to remove result resources from the restore set, as we
    # do not restore them to their prior state.
    result_resources = {
        rname
        for rname in restore_set
        if workspace.get_resource_role(rname) == ResourceRoles.RESULTS
    }
    result_resources_in_restore_set = result_resources.intersection(
        restore_set)
    if len(result_resources_in_restore_set) > 0:
        if strict:
            raise ConfigurationError(
                "Restore set contains result resources, which cannot be restored. The following are result resources: %s"
                % ", ".join(result_resources_in_restore_set))
        else:
            click.echo(
                "Skipping the restore of the following result resources, which are left in their latest state: %s"
                % ", ".join(result_resources_in_restore_set))
            restore_set = restore_set.difference(result_resources)

    # error checking
    invalid = restore_set.difference(all_names)
    if len(invalid) > 0:
        raise ConfigurationError("Resource name(s) not found: %s" %
                                 ", ".join(sorted(invalid)))
    removed_names = restore_set.difference(current_names)
    if len(removed_names) > 0:
        if strict:
            raise ConfigurationError(
                "Resources have been removed from workspace or have no restore hash and strict mode is enabled."
                + " Removed resources: %s" % ", ".join(sorted(removed_names)))
        else:
            click.echo(
                "Skipping restore of resources that have been removed from workspace or have no restore hash: %s"
                % ", ".join(sorted(removed_names)),
                err=True,
            )
            restore_set = restore_set.difference(removed_names)
    added_names = restore_set.difference(snapshot_names)
    if len(added_names) > 0:
        if strict:
            raise ConfigurationError(
                "Resources have been added to workspace since restore, and strict mode enabled."
                + " Added resources: %s" % ", ".join(sorted(added_names)))
        else:
            click.echo(
                "Resources have been added to workspace since restore, will leave them as-is: %s"
                % ", ".join(sorted(added_names)),
                err=True,
            )
            restore_set = restore_set.difference(added_names)

    # get ordered list of names and resources as well as restore hashes
    restore_name_list = [
        rn for rn in workspace.get_resource_names() if rn in restore_set
    ]
    if len(restore_name_list) == 0:
        click.echo("No resources to restore.")
        return 0
    restore_resource_list = [
        workspace.get_resource(rn) for rn in restore_name_list
    ]
    for r in restore_resource_list:
        if not isinstance(r, SnapshotResourceMixin):
            raise InternalError(
                "Resource %s was in snapshot, but is not a SnapshotResourceMixin"
                % r.name)
    restore_hashes = {rn: md.restore_hashes[rn] for rn in restore_set}

    tagstr = " (%s)" % ",".join(md.tags) if len(md.tags) > 0 else ""
    click.echo("Restoring snapshot %s%s" % (md.hashval, tagstr))

    def fmt_rlist(rnames):
        if len(rnames) > 0:
            return ", ".join(rnames)
        else:
            return "None"

    click.echo("  Resources to restore: %s" % fmt_rlist(restore_name_list))
    names_to_leave = sorted(current_names.difference(restore_set))
    click.echo("  Resources to leave: %s" % fmt_rlist(names_to_leave))
    if not workspace.batch:
        # Unless in batch mode, we always want to ask for confirmation
        resp = input("Should I perform this restore? [Y/n]")
        if resp.lower() != "y" and resp != "":
            raise UserAbort()

    # do the work!
    mixin.restore(md.hashval, restore_hashes,
                  cast(List[SnapshotResourceMixin], restore_resource_list))
    workspace.save("Restore to %s" % md.hashval)

    return len(restore_name_list)