Пример #1
0
def _collect_paths(
    repo: "Repo",
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    from dvc.tree.repo import RepoTree

    path_infos = [PathInfo(os.path.abspath(target)) for target in targets]
    tree = RepoTree(repo)

    target_infos = []
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.extend(tree.walk_files(path_info))

        if not tree.exists(path_info):
            if not recursive:
                if rev == "workspace" or rev == "":
                    logger.warning(
                        "'%s' was not found in current workspace.", path_info,
                    )
                else:
                    logger.warning(
                        "'%s' was not found at: '%s'.", path_info, rev,
                    )
            continue
        target_infos.append(path_info)
    return target_infos
Пример #2
0
def _collect_metrics(repo, targets, recursive):

    if targets:
        target_infos = [
            PathInfo(os.path.abspath(target)) for target in targets
        ]
        tree = RepoTree(repo)

        rec_files = []
        if recursive:
            for target_info in target_infos:
                if tree.isdir(target_info):
                    rec_files.extend(list(tree.walk_files(target_info)))

        result = [t for t in target_infos if tree.isfile(t)]
        result.extend(rec_files)

        return result

    metrics = set()
    for stage in repo.stages:
        for out in stage.outs:
            if not out.metric:
                continue
            metrics.add(out.path_info)
    return list(metrics)
Пример #3
0
def _collect_paths(
    repo: Repo,
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    path_infos = {PathInfo(os.path.abspath(target)) for target in targets}
    tree = RepoTree(repo)

    target_infos = set()
    for path_info in path_infos:

        if recursive and tree.isdir(path_info):
            target_infos.update(set(tree.walk_files(path_info)))

        if not tree.exists(path_info):
            if not recursive:
                logger.warning(
                    "'%s' was not found at: '%s'.",
                    path_info,
                    rev,
                )
            continue
        target_infos.add(path_info)
    return target_infos
Пример #4
0
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        from dvc.tree.repo import RepoTree

        targets = [targets] if isinstance(targets, str) else targets or []
        data = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            tree = RepoTree(self.repo)
            plots = _collect_plots(self.repo, targets, rev, recursive)
            for path_info, props in plots.items():

                if rev not in data:
                    data[rev] = {}

                if tree.isdir(path_info):
                    plot_files = []
                    for pi in tree.walk_files(path_info):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    plot_files = [
                        (path_info, relpath(path_info, self.repo.root_dir))
                    ]

                for path, repo_path in plot_files:
                    data[rev].update({repo_path: {"props": props}})

                    # Load data from git or dvc cache
                    try:
                        with tree.open(path) as fd:
                            data[rev][repo_path]["data"] = fd.read()
                    except FileNotFoundError:
                        # This might happen simply because cache is absent
                        pass

        return data
Пример #5
0
 def collect_files(tree: BaseTree, repo_tree: RepoTree):
     for fname in repo_tree.walk_files(repo_tree.root_dir, dvcfiles=True):
         if not repo_tree.isdvc(fname):
             yield tree.path_info / fname.relative_to(repo_tree.root_dir)