示例#1
0
文件: __init__.py 项目: pmrowla/dvc
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
        onerror: Optional[Callable] = None,
        props: Optional[Dict] = None,
    ) -> Generator[Dict, None, None]:
        """Collects all props and data for plots.

        Generator yielding a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "unstructured data (as stored for given extension)",
            }}}
        """
        from dvc.utils.collections import ensure_list

        targets = ensure_list(targets)
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"
            yield {
                rev:
                self._collect_from_revision(
                    revision=rev,
                    targets=targets,
                    recursive=recursive,
                    onerror=onerror,
                    props=props,
                )
            }
示例#2
0
    def show(
        self,
        targets: List[str] = None,
        revs=None,
        props=None,
        templates=None,
        recursive=False,
    ):
        from dvc.utils.collections import ensure_list

        data = self.collect(targets, revs, recursive)

        # If any mentioned plot doesn't have any data then that's an error
        for target in ensure_list(targets):
            rpath = relpath(target, self.repo.root_dir)
            if not any("data" in rev_data[key] for rev_data in data.values()
                       for key, d in rev_data.items() if rpath in key):
                raise MetricDoesNotExistError([target])

        # No data at all is a special error with a special message
        if not data:
            raise NoMetricsFoundError("plots", "--plots/--plots-no-cache")

        if templates is None:
            templates = self.templates
        return self.render(data, revs, props, templates)
示例#3
0
文件: index.py 项目: rpatil524/dvc
    def used_objs(
        self,
        targets: "TargetType" = None,
        with_deps: bool = False,
        remote: str = None,
        force: bool = False,
        recursive: bool = False,
        jobs: int = None,
    ) -> "ObjectContainer":
        from collections import defaultdict
        from itertools import chain

        from dvc.utils.collections import ensure_list

        used: "ObjectContainer" = defaultdict(set)
        collect_targets: Sequence[Optional[str]] = (None,)
        if targets:
            collect_targets = ensure_list(targets)

        pairs = chain.from_iterable(
            self.stage_collector.collect_granular(
                target, recursive=recursive, with_deps=with_deps
            )
            for target in collect_targets
        )

        for stage, filter_info in pairs:
            for odb, objs in stage.get_used_objs(
                remote=remote,
                force=force,
                jobs=jobs,
                filter_info=filter_info,
            ).items():
                used[odb].update(objs)
        return used
示例#4
0
文件: __init__.py 项目: phdtanvir/dvc
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        from dvc.fs.repo import RepoFileSystem
        from dvc.utils.collections import ensure_list

        targets = ensure_list(targets)
        data: Dict[str, Dict] = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            fs = RepoFileSystem(self.repo)
            plots = _collect_plots(self.repo, targets, rev, recursive)
            for path_info, props in plots.items():

                if rev not in data:
                    data[rev] = {}

                if fs.isdir(path_info):
                    plot_files = []
                    for pi in fs.walk_files(path_info):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    plot_files = [
                        (path_info, relpath(path_info, self.repo.root_dir))
                    ]

                for path, repo_path in plot_files:
                    data[rev].update({repo_path: {"props": props}})

                    # Load data from git or dvc cache
                    try:
                        with fs.open(path) as fd:
                            data[rev][repo_path]["data"] = fd.read()
                    except FileNotFoundError:
                        # This might happen simply because cache is absent
                        pass

        return data
示例#5
0
文件: __init__.py 项目: iterative/dvc
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
        onerror: Optional[Callable] = None,
        props: Optional[Dict] = None,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "unstructured data (as stored for given extension)",
            }}}
        """
        from dvc.utils.collections import ensure_list

        targets = ensure_list(targets)
        data: Dict[str, Dict] = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"
            data[rev] = self._collect_from_revision(
                revision=rev,
                targets=targets,
                recursive=recursive,
                onerror=onerror,
                props=props,
            )

        errored = errored_revisions(data)
        if errored:
            from dvc.ui import ui

            ui.error_write(
                "DVC failed to load some plots for following revisions: "
                f"'{', '.join(errored)}'.")

        return data
示例#6
0
文件: add.py 项目: vijay-pinjala/dvc
def add(  # noqa: C901
    repo,
    targets: "TargetType",
    recursive=False,
    no_commit=False,
    fname=None,
    to_remote=False,
    **kwargs,
):
    from dvc.utils.collections import ensure_list

    if recursive and fname:
        raise RecursiveAddingWhileUsingFilename()

    targets = ensure_list(targets)

    to_cache = kwargs.get("out") and not to_remote
    invalid_opt = None
    if to_remote or to_cache:
        message = "{option} can't be used with "
        message += "--to-remote" if to_remote else "-o"
        if len(targets) != 1:
            invalid_opt = "multiple targets"
        elif no_commit:
            invalid_opt = "--no-commit option"
        elif recursive:
            invalid_opt = "--recursive option"
        elif kwargs.get("external"):
            invalid_opt = "--external option"
    else:
        message = "{option} can't be used without --to-remote"
        if kwargs.get("remote"):
            invalid_opt = "--remote"
        elif kwargs.get("jobs"):
            invalid_opt = "--jobs"

    if invalid_opt is not None:
        raise InvalidArgumentError(message.format(option=invalid_opt))

    link_failures = []
    stages_list = []
    num_targets = len(targets)
    with Tqdm(total=num_targets, desc="Add", unit="file", leave=True) as pbar:
        if num_targets == 1:
            # clear unneeded top-level progress bar for single target
            pbar.bar_format = "Adding..."
            pbar.refresh()
        for target in targets:
            sub_targets = _find_all_targets(repo, target, recursive)
            pbar.total += len(sub_targets) - 1

            if os.path.isdir(target) and len(sub_targets) > LARGE_DIR_SIZE:
                logger.warning(
                    "You are adding a large directory '{target}' recursively,"
                    " consider tracking it as a whole instead.\n"
                    "{purple}HINT:{nc} Remove the generated DVC file and then"
                    " run `{cyan}dvc add {target}{nc}`".format(
                        purple=colorama.Fore.MAGENTA,
                        cyan=colorama.Fore.CYAN,
                        nc=colorama.Style.RESET_ALL,
                        target=target,
                    ))

            stages = _create_stages(
                repo,
                sub_targets,
                fname,
                pbar=pbar,
                transfer=to_remote or to_cache,
                **kwargs,
            )

            try:
                repo.check_modified_graph(stages)
            except OverlappingOutputPathsError as exc:
                msg = (
                    "Cannot add '{out}', because it is overlapping with other "
                    "DVC tracked output: '{parent}'.\n"
                    "To include '{out}' in '{parent}', run "
                    "'dvc commit {parent_stage}'").format(
                        out=exc.overlapping_out.path_info,
                        parent=exc.parent.path_info,
                        parent_stage=exc.parent.stage.addressing,
                    )
                raise OverlappingOutputPathsError(exc.parent,
                                                  exc.overlapping_out, msg)
            except OutputDuplicationError as exc:
                raise OutputDuplicationError(
                    exc.output, list(set(exc.stages) - set(stages)))

            link_failures.extend(
                _process_stages(
                    repo,
                    sub_targets,
                    stages,
                    no_commit,
                    pbar,
                    to_remote,
                    to_cache,
                    **kwargs,
                ))
            stages_list += stages

        if num_targets == 1:  # restore bar format for stats
            pbar.bar_format = pbar.BAR_FMT_DEFAULT

    if link_failures:
        msg = (
            "Some targets could not be linked from cache to workspace.\n{}\n"
            "To re-link these targets, reconfigure cache types and then run:\n"
            "\n\tdvc checkout {}").format(
                CacheLinkError.SUPPORT_LINK,
                " ".join([str(stage.relpath) for stage in link_failures]),
            )
        logger.warning(msg)

    return stages_list
示例#7
0
文件: scm_context.py 项目: nik123/dvc
 def _make_git_add_cmd(paths: Union[str, Iterable[str]]) -> str:
     files = " ".join(map(shlex.quote, ensure_list(paths)))
     return f"\tgit add {files}".expandtabs(4)
示例#8
0
文件: scm_context.py 项目: nik123/dvc
 def track_file(self,
                paths: Union[str, Iterable[str], None] = None) -> None:
     """Track file to remind user to track new files or autostage later."""
     return self.files_to_track.update(ensure_list(paths))