def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) if fname is not None and os.path.basename(fname) != fname: msg = "Stage file name '{}' should not contain subdirectories. " \ "Use '-c|--cwd' to change location of the stage file." raise StageFileBadNameError(msg.format(fname)) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) Stage._check_inside_project(project, cwd) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) project.logger.warn("Build cache is ignored when using " "--remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path): if not ignore_build_cache and stage.is_cached(): logger.info('Stage is cached, skipping.') return None msg = "'{}' already exists. Do you wish to run the command and " \ "overwrite it?".format(stage.relpath) if not overwrite and not project.prompt.prompt(msg, False): raise StageFileAlreadyExistsError(stage.relpath) return stage
def move(self, from_path, to_path): import dvc.output as Output from_out = Output.loads_from(Stage(self, cwd=os.curdir), [from_path])[0] found = False for stage in self.stages(): for out in stage.outs: if out.path != from_out.path: continue if not stage.is_data_source: raise DvcException('Dvcfile \'{}\' is not a data source.'.format(stage.rel_path)) found = True to_out = Output.loads_from(out.stage, [to_path], out.cache, out.metric)[0] out.move(to_out) stage_base = os.path.basename(stage.path).rstrip(Stage.STAGE_FILE_SUFFIX) stage_dir = os.path.dirname(stage.path) from_base = os.path.basename(from_path) to_base = os.path.basename(to_path) if stage_base == from_base: os.unlink(stage.path) stage.path = os.path.join(stage_dir, to_base + Stage.STAGE_FILE_SUFFIX) stage.dump() if not found: raise DvcException('Unable to find dvcfile with output \'{}\''.format(from_path))
def test_outs_and_outs_flags_are_sorted(dvc, typ, extra): stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs) stage.outs += output.loads_from(stage, ["barr"], use_cache=False, **extra) stage.outs += output.loads_from(stage, ["foobar"], use_cache=False, persist=True, **extra) stage.outs += output.loads_from(stage, ["foo"], persist=True, **extra) stage.outs += output.loads_from(stage, ["bar"], **extra) serialized_outs = to_pipeline_file(stage)["something"][typ] assert serialized_outs == [ "bar", { "barr": { "cache": False } }, { "foo": { "persist": True } }, { "foobar": { "cache": False, "persist": True } }, ] assert list(serialized_outs[3]["foobar"].keys()) == ["cache", "persist"]
def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path return stage
def move(self, from_path, to_path): """ Renames an output file and modifies the stage associated to reflect the change on the pipeline. If the output has the same name as its stage, it would also rename the corresponding stage file. E.g. Having: (hello, hello.dvc) $ dvc move hello greetings Result: (greeting, greeting.dvc) It only works with outputs generated by `add` or `import`, also known as data sources. """ import dvc.output as Output from dvc.stage import Stage from_out = Output.loads_from(Stage(self, cwd=os.curdir), [from_path])[0] to_path = self._expand_target_path(from_path, to_path) try: stage, out = next((stage, out) for stage in self.stages() for out in stage.outs if from_out.path == out.path) except StopIteration: raise DvcException( "unable to find stage file with output '{path}'".format( path=from_path)) if not stage.is_data_source: raise MoveNotDataSourceError(stage.relpath) stage_name = os.path.splitext(os.path.basename(stage.path))[0] from_name = os.path.basename(from_out.path) if stage_name == from_name: os.unlink(stage.path) stage.path = os.path.join( os.path.dirname(to_path), os.path.basename(to_path) + Stage.STAGE_FILE_SUFFIX, ) stage.cwd = os.path.join(self.root_dir, os.path.dirname(to_path)) to_out = Output.loads_from(stage, [os.path.basename(to_path)], out.cache, out.metric)[0] with self.state: out.move(to_out) stage.dump() self._remind_to_git_add()
def move(self, from_path, to_path): """ Renames an output file and modifies the stage associated to reflect the change on the pipeline. If the output has the same name as its stage, it would also rename the corresponding DVC-file. E.g. Having: (hello, hello.dvc) $ dvc move hello greetings Result: (greeting, greeting.dvc) It only works with outputs generated by `add` or `import`, also known as data sources. """ import dvc.output as Output from dvc.stage import Stage from ..dvcfile import DVC_FILE_SUFFIX, Dvcfile from_out = Output.loads_from(Stage(self), [from_path])[0] assert from_out.scheme == "local" to_path = _expand_target_path(from_path, to_path) outs = self.find_outs_by_path(from_out.fspath) assert len(outs) == 1 out = outs[0] stage = out.stage if not stage.is_data_source: raise MoveNotDataSourceError(stage.addressing) stage_name = os.path.splitext(os.path.basename(stage.path))[0] from_name = os.path.basename(from_out.fspath) if stage_name == from_name: os.unlink(stage.path) stage.path = os.path.join( os.path.dirname(to_path), os.path.basename(to_path) + DVC_FILE_SUFFIX, ) stage.wdir = os.path.abspath( os.path.join(os.curdir, os.path.dirname(to_path))) to_path = os.path.relpath(to_path, stage.wdir) to_out = Output.loads_from(stage, [to_path], out.use_cache, out.metric)[0] out.move(to_out) stage.save() Dvcfile(self, stage.path).dump(stage)
def move(self, from_path, to_path): """ Renames an output file and modifies the stage associated to reflect the change on the pipeline. If the output has the same name as its stage, it would also rename the corresponding stage file. E.g. Having: (hello, hello.dvc) $ dvc move hello greetings Result: (greeting, greeting.dvc) It only works with outputs generated by `add` or `import`, also known as data sources. """ import dvc.output as Output from dvc.stage import Stage from_out = Output.loads_from(Stage(self, cwd=os.curdir), [from_path])[0] to_path = _expand_target_path(from_path, to_path) outs = self.find_outs_by_path(from_out.path) assert len(outs) == 1 out = outs[0] stage = out.stage if not stage.is_data_source: raise MoveNotDataSourceError(stage.relpath) stage_name = os.path.splitext(os.path.basename(stage.path))[0] from_name = os.path.basename(from_out.path) if stage_name == from_name: os.unlink(stage.path) stage.path = os.path.join( os.path.dirname(to_path), os.path.basename(to_path) + Stage.STAGE_FILE_SUFFIX, ) stage.cwd = os.path.abspath( os.path.join(os.curdir, os.path.dirname(to_path)) ) to_out = Output.loads_from( stage, [os.path.basename(to_path)], out.cache, out.metric )[0] with self.state: out.move(to_out) stage.dump() self.remind_to_git_add()
def fill_stage_outputs(stage, **kwargs): from dvc.output import loads_from assert not stage.outs keys = [ "outs_persist", "outs_persist_no_cache", "metrics_no_cache", "metrics", "plots_no_cache", "plots", "outs_no_cache", "outs", "checkpoints", ] stage.outs = [] stage.outs += _load_live_output(stage, **kwargs) for key in keys: stage.outs += loads_from( stage, kwargs.get(key, []), use_cache="no_cache" not in key, persist="persist" in key, metric="metrics" in key, plot="plots" in key, checkpoint="checkpoints" in key, )
def fill_stage_outputs(stage, **kwargs): assert not stage.outs keys = [ "outs_persist", "outs_persist_no_cache", "metrics_no_cache", "metrics", "plots_no_cache", "plots", "outs_no_cache", "outs", "checkpoints", ] stage.outs = [] stage.outs += _load_live_outputs(stage, kwargs.get("live", None), kwargs.get("live_summary", False)) for key in keys: stage.outs += output.loads_from( stage, kwargs.get(key, []), use_cache="no_cache" not in key, persist="persist" in key, metric="metrics" in key, plot="plots" in key, checkpoint="checkpoints" in key, )
def _load_live_output( stage, live=None, live_no_cache=None, live_summary=False, live_html=False, **kwargs, ): from dvc.output import BaseOutput, loads_from outs = [] if live or live_no_cache: assert bool(live) != bool(live_no_cache) path = live or live_no_cache outs += loads_from( stage, [path], use_cache=not bool(live_no_cache), live={ BaseOutput.PARAM_LIVE_SUMMARY: live_summary, BaseOutput.PARAM_LIVE_HTML: live_html, }, ) return outs
def fill_stage_dependencies(stage, deps=None, erepo=None, params=None): from dvc.dependency import loads_from, loads_params assert not stage.deps stage.deps = [] stage.deps += loads_from(stage, deps or [], erepo=erepo) stage.deps += loads_params(stage, params or [])
def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], fname=None, cwd=os.curdir): cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage = Stage(project=project, path=path, cmd=cmd, cwd=cwd) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.deps = dependency.loads_from(stage, deps) return stage
def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) if fname is not None and os.path.basename(fname) != fname: msg = "Stage file name '{}' should not contain subdirectories. " \ "Use '-c|--cwd' to change location of the stage file." raise StageFileBadNameError(msg.format(fname)) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) Stage._check_inside_project(project, cwd) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) if os.path.exists(path): relpath = os.path.relpath(path) msg = "'{}' already exists. " \ "Do you wish to run the command and overwrite it?" if not overwrite \ and not project.prompt.prompt(msg.format(relpath), False): raise DvcException("'{}' already exists".format(relpath)) stage.cwd = cwd stage.path = path return stage
def move(self, from_path, to_path): import dvc.output as Output from dvc.stage import Stage from_out = Output.loads_from(Stage(self, cwd=os.curdir), [from_path])[0] found = False self._files_to_git_add = [] with self.state: for stage in self.stages(): for out in stage.outs: if out.path != from_out.path: continue if not stage.is_data_source: raise MoveNotDataSourceError(stage.relpath) found = True to_out = Output.loads_from(out.stage, [to_path], out.cache, out.metric)[0] out.move(to_out) stage_base = os.path.basename(stage.path) stage_base = stage_base.rstrip(Stage.STAGE_FILE_SUFFIX) stage_dir = os.path.dirname(stage.path) from_base = os.path.basename(from_path) to_base = os.path.basename(to_path) if stage_base == from_base: os.unlink(stage.path) path = to_base + Stage.STAGE_FILE_SUFFIX stage.path = os.path.join(stage_dir, path) stage.dump() self._remind_to_git_add() if not found: msg = 'Unable to find dvcfile with output \'{}\'' raise DvcException(msg.format(from_path))
def get_url(url, out=None): out = resolve_output(url, out) if os.path.exists(url): url = os.path.abspath(url) out = os.path.abspath(out) dep, = dependency.loads_from(None, [url]) out, = output.loads_from(None, [out], use_cache=False) dep.download(out)
def get_url(url, out=None): out = out or os.path.basename(urlparse(url).path) if os.path.exists(url): url = os.path.abspath(url) out = os.path.abspath(out) dep, = dependency.loads_from(None, [url]) out, = output.loads_from(None, [out], use_cache=False) dep.download(out)
def _fill_stage_outputs(self, **kwargs): assert not self.outs self.outs = [] for key in (p.value for p in OutputParams): self.outs += output.loads_from( self, kwargs.get(key, []), use_cache="no_cache" not in key, persist="persist" in key, metric="metrics" in key, )
def _fill_stage_outputs(stage, **kwargs): stage.outs = output.loads_from(stage, kwargs.get("outs", []), use_cache=True) stage.outs += output.loads_from(stage, kwargs.get("metrics", []), use_cache=True, metric=True) stage.outs += output.loads_from(stage, kwargs.get("outs_persist", []), use_cache=True, persist=True) stage.outs += output.loads_from(stage, kwargs.get("outs_no_cache", []), use_cache=False) stage.outs += output.loads_from( stage, kwargs.get("metrics_no_cache", []), use_cache=False, metric=True, ) stage.outs += output.loads_from( stage, kwargs.get("outs_persist_no_cache", []), use_cache=False, persist=True, )
def _fill_stage_outputs( stage, outs, outs_no_cache, metrics, metrics_no_cache, outs_persist, outs_persist_no_cache, ): stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, metrics, use_cache=True, metric=True) stage.outs += output.loads_from(stage, outs_persist, use_cache=True, persist=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.outs += output.loads_from(stage, outs_persist_no_cache, use_cache=False, persist=True)
def get_url(url, out=None): out = resolve_output(url, out) if os.path.exists(url): url = os.path.abspath(url) out = os.path.abspath(out) (dep, ) = dependency.loads_from(None, [url]) (out, ) = output.loads_from(None, [out], use_cache=False) if not dep.exists: raise dep.DoesNotExistError(dep) dep.download(out)
def loads(project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) if os.path.exists(path): relpath = os.path.relpath(path) msg = "'{}' already exists. " \ "Do you wish to run the command and overwrite it?" if not overwrite \ and not project.prompt.prompt(msg.format(relpath), False): raise DvcException("'{}' already exists".format(relpath)) stage.cwd = cwd stage.path = path return stage
def _load_live_outputs(stage, live_l=None, live_summary=False): from dvc.output import BaseOutput outs = [] if live_l: outs += output.loads_from( stage, [live_l], use_cache=False, live={BaseOutput.PARAM_LIVE_SUMMARY: live_summary}, ) return outs
def get_url(url, out=None, jobs=None): import dvc.dependency as dependency import dvc.output as output from dvc.utils import resolve_output out = resolve_output(url, out) if os.path.exists(url): url = os.path.abspath(url) out = os.path.abspath(out) (dep, ) = dependency.loads_from(None, [url]) (out, ) = output.loads_from(None, [out], use_cache=False) if not dep.exists: raise dep.DoesNotExistError(dep) dep.download(out, jobs=jobs)
def _load_live_output( stage, live=None, live_summary=False, live_report=False, **kwargs ): from dvc.output import BaseOutput, loads_from outs = [] if live: outs += loads_from( stage, [live], use_cache=False, live={ BaseOutput.PARAM_LIVE_SUMMARY: live_summary, BaseOutput.PARAM_LIVE_REPORT: live_report, }, ) return outs
def _fill_stage_outputs(self, **kwargs): assert not self.outs self.outs = [] for key in [ "outs", "metrics", "outs_persist", "outs_no_cache", "metrics_no_cache", "outs_persist_no_cache", ]: self.outs += output.loads_from( self, kwargs.get(key, []), use_cache="no_cache" not in key, persist="persist" in key, metric="metrics" in key, )
def fill_stage_outputs(stage, **kwargs): assert not stage.outs keys = [ "outs_persist", "outs_persist_no_cache", "metrics_no_cache", "metrics", "plots_no_cache", "plots", "outs_no_cache", "outs", ] stage.outs = [] for key in keys: stage.outs += output.loads_from( stage, kwargs.get(key, []), use_cache="no_cache" not in key, persist="persist" in key, metric="metrics" in key, plot="plots" in key, )
def create( repo=None, cmd=None, deps=None, outs=None, outs_no_cache=None, metrics=None, metrics_no_cache=None, fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, ): if outs is None: outs = [] if deps is None: deps = [] if outs_no_cache is None: outs_no_cache = [] if metrics is None: metrics = [] if metrics_no_cache is None: metrics_no_cache = [] stage = Stage(repo=repo, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from( stage, metrics, use_cache=True, metric=True ) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from( stage, metrics_no_cache, use_cache=False, metric=True ) stage.deps = dependency.loads_from(stage, deps) stage._check_circular_dependency() stage._check_duplicated_arguments() if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' should not contain subdirectories." " Use '-c|--cwd' to change location of the stage file.".format( fname=fname ) ) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) Stage._check_inside_repo(repo, cwd) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath) ) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage
def create( repo=None, cmd=None, deps=None, outs=None, outs_no_cache=None, metrics=None, metrics_no_cache=None, fname=None, cwd=None, wdir=None, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, ): if outs is None: outs = [] if deps is None: deps = [] if outs_no_cache is None: outs_no_cache = [] if metrics is None: metrics = [] if metrics_no_cache is None: metrics_no_cache = [] # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' may not contain subdirectories" " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" " along with '-f' to specify stage file path and working" " directory.".format(fname=fname) ) wdir = cwd else: wdir = os.curdir if wdir is None else wdir stage = Stage(repo=repo, wdir=wdir, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from( stage, metrics, use_cache=True, metric=True ) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from( stage, metrics_no_cache, use_cache=False, metric=True ) stage.deps = dependency.loads_from(stage, deps) stage._check_circular_dependency() stage._check_duplicated_arguments() fname = Stage._stage_fname(fname, stage.outs, add=add) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath) ) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage