Пример #1
0
def run(self, fname=None, no_exec=False, **kwargs):
    from dvc.stage import PipelineStage, Stage, create_stage
    from dvc.dvcfile import Dvcfile, PIPELINE_FILE

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")
    if not stage_name:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    stage = create_stage(stage_cls, repo=self, path=path, **kwargs)
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    if dvcfile.exists():
        if stage_name and stage_name in dvcfile.stages:
            raise DuplicateStageName(stage_name, dvcfile)
        if stage_cls != PipelineStage:
            dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True))

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if not no_exec:
        stage.run(no_commit=kwargs.get("no_commit", False))
    dvcfile.dump(stage, update_pipeline=True)
    return stage
Пример #2
0
    def modify(self, path, props=None, unset=None):
        from dvc.dvcfile import Dvcfile

        props = props or {}
        template = props.get("template")
        if template:
            self.templates.get_template(template)

        (out,) = self.repo.find_outs_by_path(path)
        if not out.plot and unset is not None:
            raise NotAPlotError(out)

        # This out will become a plot unless it is one already
        if not isinstance(out.plot, dict):
            out.plot = {}

        if unset:
            self._unset(out, unset)

        out.plot.update(props)

        # Empty dict will move it to non-plots
        if not out.plot:
            out.plot = True

        out.verify_metric()

        dvcfile = Dvcfile(self.repo, out.stage.path)
        dvcfile.dump(out.stage, update_lock=False)
Пример #3
0
def run(self, fname=None, no_exec=False, **kwargs):
    from dvc.stage import Stage
    from dvc.dvcfile import Dvcfile, DVC_FILE_SUFFIX, DVC_FILE

    outs = (
        kwargs.get("outs", [])
        + kwargs.get("outs_no_cache", [])
        + kwargs.get("metrics", [])
        + kwargs.get("metrics_no_cache", [])
        + kwargs.get("outs_persist", [])
        + kwargs.get("outs_persist_no_cache", [])
    )

    if outs:
        base = os.path.basename(os.path.normpath(outs[0]))
        path = base + DVC_FILE_SUFFIX
    else:
        path = DVC_FILE

    stage = Stage.create(self, fname or path, **kwargs)
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.overwrite_with_prompt(force=kwargs.get("overwrite", True))

    self.check_modified_graph([stage])

    if not no_exec:
        stage.run(no_commit=kwargs.get("no_commit", False))

    dvcfile.dump(stage)

    return stage
Пример #4
0
def imp_url(self, url, out=None, fname=None, erepo=None, locked=True):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within his own repository
    if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir):
        url = relpath(url, wdir)

    stage = Stage.create(
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )

    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.overwrite_with_prompt(force=True)

    self.check_modified_graph([stage])

    stage.run()

    stage.locked = locked

    dvcfile.dump(stage)

    return stage
Пример #5
0
Файл: run.py Проект: rjsears/dvc
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
    from dvc.stage import PipelineStage, Stage, create_stage
    from dvc.dvcfile import Dvcfile, PIPELINE_FILE

    if not kwargs.get("cmd"):
        raise InvalidArgumentError("command is not specified")

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")

    if stage_name and single_stage:
        raise InvalidArgumentError(
            "`-n|--name` is incompatible with `--single-stage`")

    if not stage_name and not single_stage:
        raise InvalidArgumentError("`-n|--name` is required")

    if single_stage:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    params = parse_params(kwargs.pop("params", []))
    stage = create_stage(stage_cls,
                         repo=self,
                         path=path,
                         params=params,
                         **kwargs)
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    if dvcfile.exists():
        if kwargs.get("overwrite", True):
            dvcfile.remove_stage(stage)
        elif stage_cls != PipelineStage:
            raise StageFileAlreadyExistsError(dvcfile.relpath)
        elif stage_name and stage_name in dvcfile.stages:
            raise DuplicateStageName(stage_name, dvcfile)

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(
            no_commit=kwargs.get("no_commit", False),
            run_cache=kwargs.get("run_cache", True),
        )

    dvcfile.dump(stage, update_pipeline=True, no_lock=no_exec)
    return stage
Пример #6
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)
    if stage.can_be_skipped:
        return None

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Пример #7
0
def add(self, tag, target=None, with_deps=False, recursive=False):
    stages = self.collect(target, with_deps=with_deps, recursive=recursive)
    for stage in stages:
        changed = False
        for out in stage.outs:
            if not out.info:
                logger.warning("missing checksum info for '{}'".format(out))
                continue
            out.tags[tag] = copy(out.info)
            changed = True
        if changed:
            dvcfile = Dvcfile(self, stage.path)
            dvcfile.dump(stage)
Пример #8
0
def remove(self, tag, target=None, with_deps=False, recursive=False):
    stages = self.collect(target, with_deps=with_deps, recursive=recursive)
    for stage in stages:
        changed = False
        for out in stage.outs:
            if tag not in out.tags.keys():
                logger.warning("tag '{}' not found for '{}'".format(tag, out))
                continue
            del out.tags[tag]
            changed = True
        if changed:
            dvcfile = Dvcfile(self, stage.path)
            dvcfile.dump(stage)
Пример #9
0
def test_meta_is_preserved(tmp_dir, dvc):
    (stage, ) = tmp_dir.dvc_gen("foo", "foo content")

    # Add meta to DVC-file
    data = load_stage_file(stage.path)
    data["meta"] = {"custom_key": 42}
    dump_stage_file(stage.path, data)

    # Loading and dumping to test that it works and meta is retained
    dvcfile = Dvcfile(dvc, stage.path)
    new_stage = dvcfile.load()
    dvcfile.dump(new_stage)

    new_data = load_stage_file(stage.path)
    assert new_data["meta"] == data["meta"]
Пример #10
0
def modify(repo, path, delete=False):
    outs = repo.find_outs_by_path(path)
    assert len(outs) == 1
    out = outs[0]

    if out.scheme != "local":
        msg = "output '{}' scheme '{}' is not supported for metrics"
        raise DvcException(msg.format(out.path, out.path_info.scheme))

    if delete:
        out.metric = None

    out.verify_metric()

    dvcfile = Dvcfile(repo, out.stage.path)
    dvcfile.dump(out.stage)
Пример #11
0
def imp_url(self, url, out=None, fname=None, erepo=None, frozen=True):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )

    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove_with_prompt(force=True)

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    stage.run()

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Пример #12
0
def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
    tmp_dir.gen("foo", "foo")
    stage = run_copy("foo", "bar", name="copy-foo-bar")
    dvc_file = Dvcfile(dvc, stage.path)
    lock_file = dvc_file._lockfile

    data = dvc_file._load()[0]
    lock_data = lock_file.load()
    lock_data["gibberish"] = True
    data["gibberish"] = True
    dump_yaml(lock_file.relpath, lock_data)
    with pytest.raises(StageFileFormatError):
        dvc_file.remove_stage(stage)

    lock_file.remove()
    dvc_file.dump(stage, update_pipeline=False)

    dump_yaml(dvc_file.relpath, data)
    with pytest.raises(StageFileFormatError):
        dvc_file.remove_stage(stage)
Пример #13
0
    def test(self):
        stages = self.dvc.add(self.FOO)
        self.assertEqual(len(stages), 1)
        stage = stages[0]
        self.assertTrue(stage is not None)

        d = load_stage_file(stage.relpath)

        # NOTE: checking that reloaded stage didn't change its checksum
        md5 = "11111111111111111111111111111111"
        d[stage.PARAM_MD5] = md5
        dump_stage_file(stage.relpath, d)

        dvcfile = Dvcfile(self.dvc, stage.relpath)
        stage = dvcfile.load()

        self.assertTrue(stage is not None)
        dvcfile.dump(stage)

        d = load_stage_file(stage.relpath)
        self.assertEqual(d[stage.PARAM_MD5], md5)
Пример #14
0
def modify(repo, path, typ=None, xpath=None, delete=False):
    supported_types = ["raw", "json", "csv", "tsv", "hcsv", "htsv"]
    outs = repo.find_outs_by_path(path)
    assert len(outs) == 1
    out = outs[0]

    if out.scheme != "local":
        msg = "output '{}' scheme '{}' is not supported for metrics"
        raise DvcException(msg.format(out.path, out.path_info.scheme))

    if typ is not None:
        typ = typ.lower().strip()
        if typ not in ["raw", "json", "csv", "tsv", "hcsv", "htsv"]:
            msg = (
                "metric type '{typ}' is not supported, "
                "must be one of [{types}]"
            )
            raise DvcException(
                msg.format(typ=typ, types=", ".join(supported_types))
            )
        if not isinstance(out.metric, dict):
            out.metric = {}
        out.metric[out.PARAM_METRIC_TYPE] = typ

    if xpath is not None:
        if not isinstance(out.metric, dict):
            out.metric = {}
        out.metric[out.PARAM_METRIC_XPATH] = xpath

    if delete:
        out.metric = None

    out.verify_metric()

    dvcfile = Dvcfile(repo, out.stage.path)
    dvcfile.dump(out.stage)
Пример #15
0
def test_dump_stage(tmp_dir, dvc):
    stage = PipelineStage(
        dvc, cmd="command", name="stage_name", path="dvc.yaml"
    )
    dvcfile = Dvcfile(dvc, "dvc.yaml")

    dvcfile.dump(stage, no_lock=True)
    assert not (tmp_dir / PIPELINE_FILE).exists()
    assert not (tmp_dir / PIPELINE_LOCK).exists()

    dvcfile.dump(stage, no_lock=False)
    assert not (tmp_dir / PIPELINE_FILE).exists()
    assert dvcfile._lockfile.load()

    dvcfile.dump(stage, update_pipeline=True, no_lock=False)
    assert (tmp_dir / PIPELINE_FILE).exists()
    assert (tmp_dir / PIPELINE_LOCK).exists()
    assert list(dvcfile.stages.values()) == [stage]
Пример #16
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    remote=None,
    to_remote=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self,
                                    out,
                                    always_local=to_remote and not out)

    if to_remote and no_exec:
        raise InvalidArgumentError(
            "--no-exec can't be combined with --to-remote")

    if not to_remote and remote:
        raise InvalidArgumentError(
            "--remote can't be used without --to-remote")

    # NOTE: when user is importing something from within their own repository
    if (erepo is None and os.path.exists(url)
            and path_isin(os.path.abspath(url), self.root_dir)):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        new_index = self.index.add(stage)
        new_index.check_graph()
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    elif to_remote:
        remote_odb = self.cloud.get_remote_odb(remote, "import-url")
        stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs)
        stage.save_deps()
        stage.md5 = stage.compute_md5()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Пример #17
0
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
    from dvc.dvcfile import PIPELINE_FILE, Dvcfile
    from dvc.stage import Stage, create_stage

    if not kwargs.get("cmd"):
        raise InvalidArgumentError("command is not specified")

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")

    if stage_name and single_stage:
        raise InvalidArgumentError(
            "`-n|--name` is incompatible with `--single-stage`"
        )

    if stage_name and fname:
        raise InvalidArgumentError(
            "`--file` is currently incompatible with `-n|--name` "
            "and requires `--single-stage`"
        )

    if not stage_name and not single_stage:
        raise InvalidArgumentError("`-n|--name` is required")

    if single_stage:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    params = parse_params(kwargs.pop("params", []))
    stage = create_stage(
        stage_cls, repo=self, path=path, params=params, **kwargs
    )
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    try:
        if kwargs.get("force", True):
            with suppress(ValueError):
                self.stages.remove(stage)
        else:
            _check_stage_exists(dvcfile, stage)
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(
            no_commit=kwargs.get("no_commit", False),
            run_cache=kwargs.get("run_cache", True),
        )

    dvcfile.dump(stage, update_lock=not no_exec)
    return stage