from voluptuous import Any, Required, Optional from collections import defaultdict from copy import deepcopy beetmover_description_schema = schema.extend({ # depname is used in taskref's to identify the taskID of the unsigned things Required("depname", default="build"): text_type, # unique label to describe this beetmover task, defaults to {dep.label}-beetmover Optional("label"): text_type, Required("partner-bucket-scope"): optionally_keyed_by("release-level", text_type), Required("partner-public-path"): Any(None, text_type), Required("partner-private-path"): Any(None, text_type), Optional("extra"): object, Required("shipping-phase"): task_description_schema["shipping-phase"], Optional("shipping-product"): task_description_schema["shipping-product"], Optional("priority"): task_description_schema["priority"], }) transforms = TransformSequence() transforms.add_validate(beetmover_description_schema) transforms.add(apply_partner_priority)
This code is only intended for internal use and is subject to change in ways that may break any direct use of it. """ # Pylint doesn't like the private function type naming for the callable # objects below. Given the consistent use of them, the current names seem # preferable to blindly following pylint. # # pylint: disable=invalid-name,redefined-variable-type,undefined-variable if sys.version_info[0] >= 3: _unicode_or_printable_ascii = str else: _unicode_or_printable_ascii = Any(unicode, Match(r'^[\x20-\x7E]*$')) # pylint: enable=redefined-variable-type _any_string = Any(_unicode_or_printable_ascii, str) _md5 = All(_any_string, Match('^[0-9A-Fa-f]{32}$')) _country_code = All(_any_string, Match('^[A-Z]{2}$')) _telephone_country_code = Any( All(_any_string, Match('^[0-9]{1,4}$')), All(int, Range(min=1, max=9999))) _subdivision_iso_code = All(_any_string, Match('^[0-9A-Z]{1,4}$'))
from voluptuous import Any, Required, Optional from copy import deepcopy import logging logger = logging.getLogger(__name__) beetmover_description_schema = schema.extend( { # unique label to describe this beetmover task, defaults to {dep.label}-beetmover Optional("label"): text_type, Required("partner-bucket-scope"): optionally_keyed_by( "release-level", text_type ), Required("partner-public-path"): Any(None, text_type), Required("partner-private-path"): Any(None, text_type), Optional("extra"): object, Required("shipping-phase"): task_description_schema["shipping-phase"], Optional("shipping-product"): task_description_schema["shipping-product"], Optional("priority"): task_description_schema["priority"], } ) transforms = TransformSequence() transforms.add_validate(beetmover_description_schema) @transforms.add def resolve_keys(config, jobs): for job in jobs:
class FeatureTemplate(Feature): SCHEMA = Schema({ Required('name'): Any( All(schemas.key, Length(max=256)), All(schemas.bracket_key, Length(max=256)), ), Required('metric'): Any( All(schemas.key, Length(max=256)), All(schemas.bracket_key, Length(max=256)), ), Required('field'): Any( All(schemas.dotted_key, Length(max=256)), All(schemas.bracket_key, Length(max=256)), ), 'bucket': Any(None, schemas.key, schemas.bracket_key), 'measurement': Any(None, schemas.dotted_key, schemas.bracket_key), 'match_all': Any( None, Schema([{ Required(schemas.key): Any( int, bool, float, All(str, Length(max=256)), ) }]), Schema([{ Required(schemas.bracket_key): Any( int, bool, float, All(str, Length(max=256)), ) }]), ), 'default': Any(None, int, float, 'previous', schemas.bracket_key), Optional('io', default='io'): Any('io', 'o', 'i', schemas.bracket_key), 'script': Any(None, str, schemas.bracket_key), Optional('anomaly_type', default='low_high'): Any('low', 'high', 'low_high', schemas.bracket_key), 'transform': Any(None, "diff", schemas.bracket_key), 'scores': Any( None, "min_max", "normalize", "standardize", schemas.bracket_key, ), }) @classmethod def validate(cls, args): del args['self'] return schemas.validate(cls.SCHEMA, args)
class ModelTemplate(Model): """ Loud ML Jinja model template """ TYPE = 'template_cls' SCHEMA = Schema( { Required('name'): Any( All(schemas.key, Length(max=256)), All(schemas.bracket_key, Length(max=256)), ), Required('type'): Any( All(schemas.key, Length(max=256)), All(schemas.bracket_key, Length(max=256)), ), Optional('features'): Any( None, All([FeatureTemplate.SCHEMA], Length(min=1)), ), Optional('bucket_interval'): Any( schemas.TimeDelta(min=0, min_included=False), All(schemas.bracket_key), ), 'threshold': Any( schemas.score, All(schemas.bracket_key), ), 'max_threshold': Any( schemas.score, All(schemas.bracket_key), ), 'min_threshold': Any( schemas.score, All(schemas.bracket_key), ), 'max_evals': Any( All(int, Range(min=1)), All(schemas.bracket_key), ), }, extra=ALLOW_EXTRA) def __init__(self, settings, name): settings = copy.deepcopy(settings) settings = self.validate(settings) self._settings = settings self.name = name self._state = None self.features = [ FeatureTemplate(**feature) for feature in settings['features'] ] self.bucket_interval = misc.parse_timedelta( settings.get('bucket_interval', 0)).total_seconds() @classmethod def validate(cls, settings): """Validate the settings against the schema""" return schemas.validate(cls.SCHEMA, settings) @property def is_trained(self): return False @property def data(self): return { 'settings': self.settings, } @property def state(self): return None @property def preview(self): return { 'settings': self.settings, }
MANIFEST_DIR = os.path.join(BASE_DIR, "signing-manifests") SUPPORTED_SIGNING_FORMATS = ( "autograph_gpg", "autograph_authenticode", "autograph_authenticode_stub", "autograph_hash_only_mar384", ) base_schema = Schema( { Required("url"): text_type, Required("bug"): int, Required("private-artifact"): bool, Required("signing-formats"): [Any(*SUPPORTED_SIGNING_FORMATS)], Required("sha256"): text_type, Required("filesize"): int, Required("requestor"): basestring, Required("reason"): basestring, Optional("gpg-signature"): basestring, Required("artifact-name"): basestring, Required("manifest_name"): basestring, } ) def check_manifest(manifest): # XXX add any manifest checks we want. # XXX sha256 is a valid sha256? # XXX url is a reachable url?
VALID_ASSESSMENT_TYPES = [ 'peer-assessment', 'self-assessment', 'student-training', 'staff-assessment', ] VALID_UPLOAD_FILE_TYPES = ['image', 'pdf-and-image', 'custom'] # Schema definition for an update from the Studio JavaScript editor. EDITOR_UPDATE_SCHEMA = Schema({ Required('prompts'): [Schema({ Required('description'): utf8_validator, })], Required('prompts_type', default='text'): Any(All(utf8_validator, In(PROMPTS_TYPES)), None), Required('title'): utf8_validator, Required('feedback_prompt'): utf8_validator, Required('feedback_default_text'): utf8_validator, Required('submission_start'): Any(datetime_validator, None), Required('submission_due'): Any(datetime_validator, None), Required('text_response', default='required'): Any(All(utf8_validator, In(NECESSITY_OPTIONS)), None), Required('text_response_editor', default='text'): Any(All(utf8_validator, In(AVAILABLE_EDITOR_OPTIONS)), None), Required('file_upload_response', default=None):
class ParamsDependency(LocalDependency): PARAM_PARAMS = "params" PARAM_SCHEMA = {PARAM_PARAMS: Any(dict, list, None)} DEFAULT_PARAMS_FILE = "params.yaml" def __init__(self, stage, path, params): info = {} self.params = [] if params: if isinstance(params, list): self.params = params else: assert isinstance(params, dict) self.params = list(params.keys()) info = {self.PARAM_PARAMS: params} super().__init__( stage, path or os.path.join(stage.repo.root_dir, self.DEFAULT_PARAMS_FILE), info=info, ) def dumpd(self): ret = super().dumpd() if not self.hash_info: ret[self.PARAM_PARAMS] = self.params return ret def fill_values(self, values=None): """Load params values dynamically.""" if not values: return info = {} for param in self.params: if param in values: info[param] = values[param] self.hash_info = HashInfo(self.PARAM_PARAMS, info) def workspace_status(self): status = super().workspace_status() if status.get(str(self)) == "deleted": return status status = defaultdict(dict) info = self.hash_info.value if self.hash_info else {} actual = self.read_params() for param in self.params: if param not in actual.keys(): st = "deleted" elif param not in info: st = "new" elif actual[param] != info[param]: st = "modified" else: assert actual[param] == info[param] continue status[str(self)][param] = st return status def status(self): return self.workspace_status() def read_params(self): if not self.exists: return {} suffix = self.path_info.suffix.lower() loader = LOADERS[suffix] try: config = loader(self.path_info, fs=self.repo.fs) except ParseError as exc: raise BadParamFileError( f"Unable to read parameters from '{self}'") from exc ret = {} for param in self.params: try: ret[param] = dpath.util.get(config, param, separator=".") except KeyError: pass return ret def get_hash(self): info = self.read_params() missing_params = set(self.params) - set(info.keys()) if missing_params: raise MissingParamsError( "Parameters '{}' are missing from '{}'.".format( ", ".join(missing_params), self, )) return HashInfo(self.PARAM_PARAMS, info)
Optional("job-from"): str, # dependencies of this task, keyed by name; these are passed through # verbatim and subject to the interpretation of the Task's get_dependencies # method. Optional("dependencies"): { All( str, NotIn( ["self", "decision"], "Can't use 'self` or 'decision' as depdency names.", ), ): object, }, # Soft dependencies of this task, as a list of tasks labels Optional("soft-dependencies"): [str], Optional("requires"): Any("all-completed", "all-resolved"), # expiration and deadline times, relative to task creation, with units # (e.g., "14 days"). Defaults are set based on the project. Optional("expires-after"): str, Optional("deadline-after"): str, # custom routes for this task; the default treeherder routes will be added # automatically Optional("routes"): [str], # custom scopes for this task; any scopes required for the worker will be # added automatically. The following parameters will be substituted in each # scope: # {level} -- the scm level of this push # {project} -- the project of this push Optional("scopes"): [str], # Tags Optional("tags"): {str: str},
from taskgraph.util.scriptworker import (get_balrog_server_scope, get_balrog_channel_scopes) from taskgraph.transforms.task import task_description_schema from voluptuous import Any, Required, Optional # Voluptuous uses marker objects as dictionary *keys*, but they are not # comparable, so we cast all of the keys back to regular strings task_description_schema = { str(k): v for k, v in task_description_schema.schema.iteritems() } transforms = TransformSequence() # shortcut for a string where task references are allowed taskref_or_string = Any(basestring, {Required('task-reference'): basestring}) balrog_description_schema = Schema({ # the dependent task (object) for this balrog job, used to inform balrogworker. Required('dependent-task'): object, # unique label to describe this balrog task, defaults to balrog-{dep.label} Optional('label'): basestring, # treeherder is allowed here to override any defaults we use for beetmover. See # taskcluster/taskgraph/transforms/task.py for the schema details, and the # below transforms for defaults of various values. Optional('treeherder'): task_description_schema['treeherder'],
] VALID_UPLOAD_FILE_TYPES = [ 'image', 'pdf-and-image', 'custom' ] # Schema definition for an update from the Studio JavaScript editor. EDITOR_UPDATE_SCHEMA = Schema({ Required('prompts'): [ Schema({ Required('description'): utf8_validator, }) ], Required('prompts_type', default='text'): Any(All(utf8_validator, In(PROMPTS_TYPES)), None), Required('title'): utf8_validator, Required('feedback_prompt'): utf8_validator, Required('feedback_default_text'): utf8_validator, Required('submission_start'): Any(datetime_validator, None), Required('submission_due'): Any(datetime_validator, None), Required('text_response', default='required'): Any(All(utf8_validator, In(NECESSITY_OPTIONS)), None), Required('file_upload_response', default=None): Any(All(utf8_validator, In(NECESSITY_OPTIONS)), None), 'allow_file_upload': bool, # Backwards compatibility. Required('file_upload_type', default=None): Any(All(utf8_validator, In(VALID_UPLOAD_FILE_TYPES)), None), 'white_listed_file_types': utf8_validator, Required('allow_multiple_files'): bool, Required('allow_latex'): bool, Required('leaderboard_show'): int, Optional('teams_enabled'): bool, Optional('selected_teamset_id'): utf8_validator,
# run on all projects for which cron tasks are set up. This works just like the # `run_on_projects` attribute, where strings like "release" and "integration" are # expanded to cover multiple repositories. (taskcluster/docs/attributes.rst) 'run-on-projects': [basestring], # Array of times at which this task should run. These *must* be a # multiple of 15 minutes, the minimum scheduling interval. This field # can be keyed by project so that each project has a different schedule # for the same job. 'when': optionally_keyed_by( 'project', [{ 'hour': int, 'minute': All(int, even_15_minutes), # You probably don't want both day and weekday. 'day': int, # Day of the month, as used by datetime. 'weekday': Any('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday') }]), }], }) def validate(cron_yml): validate_schema(cron_yml_schema, cron_yml, "Invalid .cron.yml:")
from voluptuous import Schema, Any, All, Length, Required, IsDir, IsFile, Match #MultipleInvalid ################################################################################# # # This class is meant to validate the data coming on the requests against the # predefined schemas for each type of document in the database, and each type # of request may have also its particularities. # ################################################################################# abstract_library_schema =Schema({ 'library_source' : Any(str, None), 'library_selection' : Any(str, None), 'library_strategy' : Any(str, None), 'instrument_model' : Any(str, None), 'coverage' : Any(str, None) }) library_schema = Schema({ 'internal_id': int, 'name': str, 'library_type': str, 'public_name' : str, 'sample_internal_id' : int, 'is_complete' : bool, 'has_minimal' : bool,
class MockProvisioner3(object): name = 'mp3' schema = Any({'a': str}, {'b': Boolean()})
from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR RUN_TASK = os.path.join(GECKO, 'taskcluster', 'docker', 'recipes', 'run-task') @memoize def _run_task_suffix(): """String to append to cache names under control of run-task.""" return hash_path(RUN_TASK)[0:20] # shortcut for a string where task references are allowed taskref_or_string = Any( basestring, {Required('task-reference'): basestring}) # A task description is a general description of a TaskCluster task task_description_schema = Schema({ # the label for this task Required('label'): basestring, # description of the task (for metadata) Required('description'): basestring, # attributes for this task Optional('attributes'): {basestring: object}, # dependencies of this task, keyed by name; these are passed through # verbatim and subject to the interpretation of the Task's get_dependencies
class Stage(object): STAGE_FILE = "Dvcfile" STAGE_FILE_SUFFIX = ".dvc" PARAM_MD5 = "md5" PARAM_CMD = "cmd" PARAM_WDIR = "wdir" PARAM_DEPS = "deps" PARAM_OUTS = "outs" PARAM_LOCKED = "locked" PARAM_META = "meta" PARAM_ALWAYS_CHANGED = "always_changed" SCHEMA = { PARAM_MD5: Any(str, None), PARAM_CMD: Any(str, None), PARAM_WDIR: Any(str, None), PARAM_DEPS: Any([dependency.SCHEMA], None), PARAM_OUTS: Any([output.SCHEMA], None), PARAM_LOCKED: bool, PARAM_META: object, PARAM_ALWAYS_CHANGED: bool, } COMPILED_SCHEMA = Schema(SCHEMA) TAG_REGEX = r"^(?P<path>.*)@(?P<tag>[^\\/@:]*)$" def __init__( self, repo, path=None, cmd=None, wdir=os.curdir, deps=None, outs=None, md5=None, locked=False, tag=None, always_changed=False, stage_text=None, ): if deps is None: deps = [] if outs is None: outs = [] self.repo = repo self.path = path self.cmd = cmd self.wdir = wdir self.outs = outs self.deps = deps self.md5 = md5 self.locked = locked self.tag = tag self.always_changed = always_changed self._stage_text = stage_text def __repr__(self): return "Stage: '{path}'".format( path=self.relpath if self.path else "No path") @property def relpath(self): return relpath(self.path) @property def is_data_source(self): """Whether the DVC-file was created with `dvc add` or `dvc import`""" return self.cmd is None @staticmethod def is_valid_filename(path): return ( # path.endswith doesn't work for encoded unicode filenames on # Python 2 and since Stage.STAGE_FILE_SUFFIX is ascii then it is # not needed to decode the path from py2's str path[-len(Stage.STAGE_FILE_SUFFIX):] == Stage.STAGE_FILE_SUFFIX or os.path.basename(path) == Stage.STAGE_FILE) @staticmethod def is_stage_file(path): return os.path.isfile(path) and Stage.is_valid_filename(path) def changed_md5(self): return self.md5 != self._compute_md5() @property def is_callback(self): """ A callback stage is always considered as changed, so it runs on every `dvc repro` call. """ return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): """Whether the DVC-file was created with `dvc import`.""" return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1 @property def is_repo_import(self): if not self.is_import: return False return isinstance(self.deps[0], dependency.DependencyREPO) def _changed_deps(self): if self.locked: return False if self.is_callback: logger.warning( "DVC-file '{fname}' is a \"callback\" stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath)) return True if self.always_changed: return True for dep in self.deps: status = dep.status() if status: logger.warning( "Dependency '{dep}' of '{stage}' changed because it is " "'{status}'.".format(dep=dep, stage=self.relpath, status=status[str(dep)])) return True return False def _changed_outs(self): for out in self.outs: status = out.status() if status: logger.warning( "Output '{out}' of '{stage}' changed because it is " "'{status}'".format(out=out, stage=self.relpath, status=status[str(out)])) return True return False def _changed_md5(self): if self.changed_md5(): logger.warning("DVC-file '{}' changed.".format(self.relpath)) return True return False def changed(self): # Short-circuit order: stage md5 is fast, deps are expected to change ret = (self._changed_md5() or self._changed_deps() or self._changed_outs()) if ret: logger.warning("Stage '{}' changed.".format(self.relpath)) else: logger.debug("Stage '{}' didn't change.".format(self.relpath)) return ret def remove_outs(self, ignore_remove=False, force=False): """Used mainly for `dvc remove --outs` and :func:`Stage.reproduce`.""" for out in self.outs: if out.persist and not force: out.unprotect() else: logger.debug("Removing output '{out}' of '{stage}'.".format( out=out, stage=self.relpath)) out.remove(ignore_remove=ignore_remove) def unprotect_outs(self): for out in self.outs: out.unprotect() def remove(self, force=False, remove_outs=True): if remove_outs: self.remove_outs(ignore_remove=True, force=force) else: self.unprotect_outs() os.unlink(self.path) def reproduce(self, interactive=False, **kwargs): if not kwargs.get("force", False) and not self.changed(): return None msg = ("Going to reproduce '{stage}'. " "Are you sure you want to continue?".format(stage=self.relpath)) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") self.run(**kwargs) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self def update(self): if not self.is_repo_import and not self.is_import: raise StageUpdateError(self.relpath) self.deps[0].update() locked = self.locked self.locked = False try: self.reproduce() finally: self.locked = locked @staticmethod def validate(d, fname=None): from dvc.utils.compat import convert_to_unicode try: Stage.COMPILED_SCHEMA(convert_to_unicode(d)) except MultipleInvalid as exc: raise StageFileFormatError(fname, exc) @classmethod def _stage_fname(cls, outs, add): if not outs: return cls.STAGE_FILE out = outs[0] fname = out.path_info.name + cls.STAGE_FILE_SUFFIX if (add and out.is_in_repo and not contains_symlink_up_to(out.fspath, out.repo.root_dir)): fname = out.path_info.with_name(fname).fspath return fname @staticmethod def _check_stage_path(repo, path): assert repo is not None real_path = os.path.realpath(path) if not os.path.exists(real_path): raise StagePathNotFoundError(path) if not os.path.isdir(real_path): raise StagePathNotDirectoryError(path) proj_dir = os.path.realpath(repo.root_dir) if real_path != proj_dir and not path_isin(real_path, proj_dir): raise StagePathOutsideError(path) @property def is_cached(self): """ Checks if this stage has been already ran and stored """ from dvc.remote.local import RemoteLOCAL from dvc.remote.s3 import RemoteS3 old = Stage.load(self.repo, self.path) if old._changed_outs(): return False # NOTE: need to save checksums for deps in order to compare them # with what is written in the old stage. for dep in self.deps: dep.save() old_d = old.dumpd() new_d = self.dumpd() # NOTE: need to remove checksums from old dict in order to compare # it to the new one, since the new one doesn't have checksums yet. old_d.pop(self.PARAM_MD5, None) new_d.pop(self.PARAM_MD5, None) outs = old_d.get(self.PARAM_OUTS, []) for out in outs: out.pop(RemoteLOCAL.PARAM_CHECKSUM, None) out.pop(RemoteS3.PARAM_CHECKSUM, None) if old_d != new_d: return False # NOTE: committing to prevent potential data duplication. For example # # $ dvc config cache.type hardlink # $ echo foo > foo # $ dvc add foo # $ rm -f foo # $ echo foo > foo # $ dvc add foo # should replace foo with a link to cache # old.commit() return True @staticmethod def create(repo, **kwargs): wdir = kwargs.get("wdir", None) cwd = kwargs.get("cwd", None) fname = kwargs.get("fname", None) add = kwargs.get("add", False) # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "DVC-file name '{fname}' may not contain subdirectories" " if `-c|--cwd` (deprecated) is specified. Use `-w|--wdir`" " along with `-f` to specify DVC-file path with working" " directory.".format(fname=fname)) wdir = cwd elif wdir is None: wdir = os.curdir stage = Stage( repo=repo, wdir=wdir, cmd=kwargs.get("cmd", None), locked=kwargs.get("locked", False), always_changed=kwargs.get("always_changed", False), ) Stage._fill_stage_outputs(stage, **kwargs) stage.deps = dependency.loads_from(stage, kwargs.get("deps", []), erepo=kwargs.get("erepo", None)) stage._check_circular_dependency() stage._check_duplicated_arguments() if not fname: fname = Stage._stage_fname(stage.outs, add) stage._check_dvc_filename(fname) # Autodetecting wdir for add, we need to create outs first to do that, # so we start with wdir = . and remap out paths later. if add and kwargs.get("wdir") is None and cwd is None: wdir = os.path.dirname(fname) for out in chain(stage.outs, stage.deps): if out.is_in_repo: out.def_path = relpath(out.path_info, wdir) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path ignore_build_cache = kwargs.get("ignore_build_cache", False) # NOTE: remove outs before we check build cache if kwargs.get("remove_outs", False): logger.warning("--remove-outs is deprecated." " It is now the default behavior," " so there's no need to use this option anymore.") stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True if os.path.exists(path) and any(out.persist for out in stage.outs): logger.warning("Build cache is ignored when persisting outputs.") ignore_build_cache = True if os.path.exists(path): if (not ignore_build_cache and stage.is_cached and not stage.is_callback and not stage.always_changed): logger.info("Stage is cached, skipping.") return None msg = ("'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath)) if not kwargs.get("overwrite", True) and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage @staticmethod def _fill_stage_outputs(stage, **kwargs): stage.outs = output.loads_from(stage, kwargs.get("outs", []), use_cache=True) stage.outs += output.loads_from(stage, kwargs.get("metrics", []), use_cache=True, metric=True) stage.outs += output.loads_from(stage, kwargs.get("outs_persist", []), use_cache=True, persist=True) stage.outs += output.loads_from(stage, kwargs.get("outs_no_cache", []), use_cache=False) stage.outs += output.loads_from( stage, kwargs.get("metrics_no_cache", []), use_cache=False, metric=True, ) stage.outs += output.loads_from( stage, kwargs.get("outs_persist_no_cache", []), use_cache=False, persist=True, ) @staticmethod def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( "bad DVC-file name '{}'. DVC-files should be named " "'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( relpath(fname), os.path.basename(fname))) @staticmethod def _check_file_exists(repo, fname): if not repo.tree.exists(fname): raise StageFileDoesNotExistError(fname) @staticmethod def _check_isfile(repo, fname): if not repo.tree.isfile(fname): raise StageFileIsNotDvcFileError(fname) @classmethod def _get_path_tag(cls, s): regex = re.compile(cls.TAG_REGEX) match = regex.match(s) if not match: return s, None return match.group("path"), match.group("tag") @staticmethod def load(repo, fname): fname, tag = Stage._get_path_tag(fname) # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a DVC-file # 3. path doesn't represent a regular file Stage._check_file_exists(repo, fname) Stage._check_dvc_filename(fname) Stage._check_isfile(repo, fname) with repo.tree.open(fname) as fd: stage_text = fd.read() d = parse_stage(stage_text, fname) Stage.validate(d, fname=relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join(os.path.dirname(path), d.get(Stage.PARAM_WDIR, "."))), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), tag=tag, always_changed=d.get(Stage.PARAM_ALWAYS_CHANGED, False), # We store stage text to apply updates to the same structure stage_text=stage_text, ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage def dumpd(self): rel_wdir = relpath(self.wdir, os.path.dirname(self.path)) wdir = pathlib.PurePath(rel_wdir).as_posix() wdir = wdir if wdir != "." else None return { key: value for key, value in { Stage.PARAM_MD5: self.md5, Stage.PARAM_CMD: self.cmd, Stage.PARAM_WDIR: wdir, Stage.PARAM_LOCKED: self.locked, Stage.PARAM_DEPS: [d.dumpd() for d in self.deps], Stage.PARAM_OUTS: [o.dumpd() for o in self.outs], Stage.PARAM_ALWAYS_CHANGED: self.always_changed, }.items() if value } def dump(self): fname = self.path self._check_dvc_filename(fname) logger.debug( "Saving information to '{file}'.".format(file=relpath(fname))) state = self.dumpd() # When we load a stage we parse yaml with a fast parser, which strips # off all the comments and formatting. To retain those on update we do # a trick here: # - reparse the same yaml text with a slow but smart ruamel yaml parser # - apply changes to a returned structure # - serialize it if self._stage_text is not None: saved_state = parse_stage_for_update(self._stage_text, fname) # Stage doesn't work with meta in any way, so .dumpd() doesn't # have it. We simply copy it over. if "meta" in saved_state: state["meta"] = saved_state["meta"] apply_diff(state, saved_state) state = saved_state dump_stage_file(fname, state) self.repo.scm.track_file(relpath(fname)) def _compute_md5(self): from dvc.output.base import OutputBase d = self.dumpd() # Remove md5 and meta, these should not affect stage md5 d.pop(self.PARAM_MD5, None) d.pop(self.PARAM_META, None) # Ignore the wdir default value. In this case DVC-file w/o # wdir has the same md5 as a file with the default value specified. # It's important for backward compatibility with pipelines that # didn't have WDIR in their DVC-files. if d.get(self.PARAM_WDIR) == ".": del d[self.PARAM_WDIR] # NOTE: excluding parameters that don't affect the state of the # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if # it has changed, we might not have that output in our cache. m = dict_md5( d, exclude=[ self.PARAM_LOCKED, OutputBase.PARAM_METRIC, OutputBase.PARAM_TAGS, OutputBase.PARAM_PERSIST, ], ) logger.debug("Computed stage '{}' md5: '{}'".format(self.relpath, m)) return m def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() self.md5 = self._compute_md5() @staticmethod def _changed_entries(entries): return [ str(entry) for entry in entries if entry.checksum and entry.changed_checksum() ] def check_can_commit(self, force): changed_deps = self._changed_entries(self.deps) changed_outs = self._changed_entries(self.outs) if changed_deps or changed_outs or self.changed_md5(): msg = ("dependencies {}".format(changed_deps) if changed_deps else "") msg += " and " if (changed_deps and changed_outs) else "" msg += "outputs {}".format(changed_outs) if changed_outs else "" msg += "md5" if not (changed_deps or changed_outs) else "" msg += " of '{}' changed.".format(self.relpath) msg += "Are you sure you want to commit it?" if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " "force.".format(self.relpath)) self.save() def commit(self): for out in self.outs: out.commit() def _check_missing_deps(self): missing = [dep for dep in self.deps if not dep.exists] if any(missing): raise MissingDep(missing) @staticmethod def _warn_if_fish(executable): # pragma: no cover if (executable is None or os.path.basename(os.path.realpath(executable)) != "fish"): return logger.warning( "DVC detected that you are using fish as your default " "shell. Be aware that it might cause problems by overwriting " "your current environment variables with values defined " "in '.fishrc', which might affect your command. See " "https://github.com/iterative/dvc/issues/1307. ") def _check_circular_dependency(self): from dvc.exceptions import CircularDependencyError circular_dependencies = set(d.path_info for d in self.deps) & set( o.path_info for o in self.outs) if circular_dependencies: raise CircularDependencyError(str(circular_dependencies.pop())) def _check_duplicated_arguments(self): from dvc.exceptions import ArgumentDuplicationError from collections import Counter path_counts = Counter(edge.path_info for edge in self.deps + self.outs) for path, occurrence in path_counts.items(): if occurrence > 1: raise ArgumentDuplicationError(str(path)) def _run(self): self._check_missing_deps() kwargs = {"cwd": self.wdir, "env": fix_env(None), "close_fds": True} if os.name == "nt": kwargs["shell"] = True cmd = self.cmd else: # NOTE: when you specify `shell=True`, `Popen` [1] will default to # `/bin/sh` on *nix and will add ["/bin/sh", "-c"] to your command. # But we actually want to run the same shell that we are running # from right now, which is usually determined by the `SHELL` env # var. So instead, we compose our command on our own, making sure # to include special flags to prevent shell from reading any # configs and modifying env, which may change the behavior or the # command we are running. See [2] for more info. # # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py # #L1426 # [2] https://github.com/iterative/dvc/issues/2506 # #issuecomment-535396799 kwargs["shell"] = False executable = os.getenv("SHELL") or "/bin/sh" self._warn_if_fish(executable) opts = {"zsh": ["--no-rcs"], "bash": ["--noprofile", "--norc"]} name = os.path.basename(executable).lower() cmd = [executable] + opts.get(name, []) + ["-c", self.cmd] main_thread = isinstance(threading.current_thread(), threading._MainThread) old_handler = None p = None try: p = subprocess.Popen(cmd, **kwargs) if main_thread: old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) p.communicate() finally: if old_handler: signal.signal(signal.SIGINT, old_handler) if (p is None) or (p.returncode != 0): raise StageCmdFailedError(self) def run(self, dry=False, no_commit=False, force=False): if (self.cmd or self.is_import) and not self.locked and not dry: self.remove_outs(ignore_remove=False, force=False) if self.locked: logger.info("Verifying outputs in locked stage '{stage}'".format( stage=self.relpath)) if not dry: self.check_missing_outputs() elif self.is_import: logger.info("Importing '{dep}' -> '{out}'".format( dep=self.deps[0], out=self.outs[0])) if not dry: if not force and self._already_cached(): self.outs[0].checkout() else: self.deps[0].download(self.outs[0]) elif self.is_data_source: msg = "Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: logger.info("Running command:\n\t{}".format(self.cmd)) if not dry: if (not force and not self.is_callback and not self.always_changed and self._already_cached()): self.checkout() else: self._run() if not dry: self.save() if not no_commit: self.commit() def check_missing_outputs(self): paths = [str(out) for out in self.outs if not out.exists] if paths: raise MissingDataSource(paths) def checkout(self, force=False, progress_callback=None): failed_checkouts = [] for out in self.outs: failed = out.checkout(force=force, tag=self.tag, progress_callback=progress_callback) if failed: failed_checkouts.append(failed) return failed_checkouts @staticmethod def _status(entries): ret = {} for entry in entries: ret.update(entry.status()) return ret def status(self): ret = [] if not self.locked: deps_status = self._status(self.deps) if deps_status: ret.append({"changed deps": deps_status}) outs_status = self._status(self.outs) if outs_status: ret.append({"changed outs": outs_status}) if self.changed_md5(): ret.append("changed checksum") if self.is_callback or self.always_changed: ret.append("always changed") if ret: return {self.relpath: ret} return {} def _already_cached(self): return (not self.changed_md5() and all(not dep.changed() for dep in self.deps) and all(not out.changed_cache() if out. use_cache else not out.changed() for out in self.outs)) def get_all_files_number(self): return sum(out.get_files_number() for out in self.outs)
def test_fix_157(): s = Schema(All([Any('one', 'two', 'three')]), Length(min=1)) assert_equal(['one'], s(['one'])) assert_raises(MultipleInvalid, s, ['four'])
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from voluptuous import ALLOW_EXTRA, Any, Required, Schema option_schema = Schema( { Required('description'): Any(basestring, [basestring]), 'required': bool, 'choices': list, 'aliases': list, 'version_added': Any(basestring, float) }, extra=ALLOW_EXTRA) doc_schema = Schema( { Required('module'): basestring, 'short_description': basestring, 'description': Any(basestring, [basestring]), 'version_added': Any(basestring, float), 'author': Any(None, basestring, [basestring]), 'notes': Any(None, [basestring]),
from __future__ import absolute_import, print_function, unicode_literals from six import text_type from taskgraph.transforms.base import TransformSequence from taskgraph.transforms.task import task_description_schema from taskgraph.util.schema import Schema from taskgraph.util.taskcluster import get_artifact_path from voluptuous import ( Any, Optional, Required, ) index_or_string = Any( text_type, {Required("index-search"): text_type}, ) diff_description_schema = Schema({ # Name of the diff task. Required("name"): text_type, # Treeherder symbol. Required("symbol"): text_type, # relative path (from config.path) to the file the task was defined in. Optional("job-from"): text_type, # Original and new builds to compare. Required("original"): index_or_string,
complexities of worker implementations, scopes, and treeherder annotations. """ from __future__ import absolute_import, print_function, unicode_literals import json import time from taskgraph.util.treeherder import split_symbol from taskgraph.transforms.base import (validate_schema, TransformSequence) from voluptuous import Schema, Any, Required, Optional, Extra from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR # shortcut for a string where task references are allowed taskref_or_string = Any(basestring, {Required('task-reference'): basestring}) # A task description is a general description of a TaskCluster task task_description_schema = Schema({ # the label for this task Required('label'): basestring, # description of the task (for metadata) Required('description'): basestring, # attributes for this task Optional('attributes'): { basestring: object },
from funcy import walk_values from voluptuous import ( All, Any, Coerce, Invalid, Lower, Optional, Range, Schema, ) Bool = All( Lower, Any("true", "false"), lambda v: v == "true", msg="expected true or false", ) def supported_cache_type(types): """Checks if link type config option consists only of valid values. Args: types (list/string): type(s) of links that dvc should try out. """ if types is None: return None if isinstance(types, str): types = [typ.strip() for typ in types.split(",")]
"""Logging config.""" from voluptuous import All, Any, Optional, Schema LOG_LEVELS = Any("DEBUG", "INFO", "WARNING", "ERROR", "FATAL") def upper_case(data: str) -> str: """Return data as upper case.""" return data.upper() SCHEMA = Schema( {Optional("level", default="INFO"): All(str, upper_case, LOG_LEVELS)}) class LoggingConfig: """Logging config.""" schema = SCHEMA def __init__(self, logging): self._level = logging["level"] @property def level(self): """Return log level.""" return self._level
class Model: """ Loud ML model """ TYPE = 'model_cls' SCHEMA = Schema( { Required('name'): All(schemas.key, Length(max=256)), Required('type'): All(schemas.key, Length(max=256)), Optional('features'): Any( None, All([Feature.SCHEMA], Length(min=1)), Schema({ Optional('i'): All([Feature.SCHEMA], Length(min=1)), Optional('o'): All([Feature.SCHEMA], Length(min=1)), Optional('io'): All([Feature.SCHEMA], Length(min=1)), }), ), Optional('bucket_interval'): schemas.TimeDelta( min=0, min_included=False, ), 'routing': Any(None, schemas.key), 'threshold': schemas.score, 'max_threshold': schemas.score, 'min_threshold': schemas.score, 'max_evals': All(int, Range(min=1)), }, extra=ALLOW_EXTRA) def __init__(self, settings, state=None): """ name -- model settings """ settings['type'] = self.TYPE settings = copy.deepcopy(settings) settings = self.validate(settings) self._settings = settings self.name = settings.get('name') self.routing = settings.get('routing') self._state = state self.features = [ Feature(**feature) for feature in settings['features'] ] self.bucket_interval = misc.parse_timedelta( settings.get('bucket_interval', 0)).total_seconds() self.max_threshold = self.settings.get('max_threshold') if self.max_threshold is None: # Backward compatibility self.max_threshold = self.settings.get('threshold', 0) self.settings['max_threshold'] = self.max_threshold self.min_threshold = self.settings.get('min_threshold') if self.min_threshold is None: # Backward compatibility self.min_threshold = self.settings.get('threshold', 0) self.settings['min_threshold'] = self.min_threshold @classmethod def validate(cls, settings): """Validate the settings against the schema""" res = schemas.validate(cls.SCHEMA, settings) features = flatten_features(settings.get('features')) res['features'] = features has_input = False has_output = False for feature in res['features']: io = feature.get('io', 'io') if 'i' in io: has_input = True if 'o' in io: has_output = True if has_input and has_output: break if not has_input: raise errors.Invalid('model has no input feature') if not has_output: raise errors.Invalid('model has no output feature') return res @property def type(self): return self.settings['type'] @property def default_bucket(self): return self._settings.get('default_bucket') def get_tags(self): tags = { 'model': self.name, } for feature in self.features: if feature.match_all: for condition in feature.match_all: tag = condition['tag'] val = condition['value'] tags[tag] = val return tags @property def settings(self): return self._settings @property def nb_features(self): return len(self.features) @property def is_trained(self): return self._state is not None @property def data(self): return { 'settings': self.settings, 'state': self.state, } @property def seasonality(self): return self._settings['seasonality'] @property def state(self): return self._state @property def preview(self): state = { 'trained': self.is_trained, } if self.is_trained: state['loss'] = self.state.get('loss') return { 'settings': self.settings, 'state': state, } def generate_fake_prediction(self): """ Generate a prediction with fake values. Just for testing purposes. """ return NotImplemented()
def main(): """Validate BOTMETA""" path = '.github/BOTMETA.yml' try: with open(path, 'r') as f_path: botmeta = yaml.safe_load(f_path) except yaml.error.MarkedYAMLError as ex: print('%s:%d:%d: YAML load failed: %s' % (path, ex.context_mark.line + 1, ex.context_mark.column + 1, re.sub(r'\s+', ' ', str(ex)))) sys.exit() except Exception as ex: # pylint: disable=broad-except print('%s:%d:%d: YAML load failed: %s' % (path, 0, 0, re.sub(r'\s+', ' ', str(ex)))) sys.exit() list_string_types = list(string_types) files_schema = Any( Schema(*string_types), Schema({ 'ignored': Any(list_string_types, *string_types), 'keywords': Any(list_string_types, *string_types), 'labels': Any(list_string_types, *string_types), 'maintainers': Any(list_string_types, *string_types), 'migrated_to': All( Any(*string_types), Match(r'^\w+\.\w+$'), ), 'notified': Any(list_string_types, *string_types), 'supershipit': Any(list_string_types, *string_types), 'support': Any("core", "network", "community"), })) list_dict_file_schema = [{ str_type: files_schema } for str_type in string_types] schema = Schema({ Required('automerge'): bool, Required('files'): Any(None, *list_dict_file_schema), Required('macros'): dict, # Any(*list_macros_schema), }) # Ensure schema is valid try: schema(botmeta) except MultipleInvalid as ex: for error in ex.errors: # No way to get line numbers print('%s:%d:%d: %s' % (path, 0, 0, humanize_error(botmeta, error))) # Ensure botmeta is always support:core botmeta_support = botmeta.get('files', {}).get('.github/BOTMETA.yml', {}).get('support', '') if botmeta_support != 'core': print('%s:%d:%d: .github/BOTMETA.yml MUST be support: core' % (path, 0, 0)) # Find all path (none-team) macros so we can substitute them macros = botmeta.get('macros', {}) path_macros = [] for macro in macros: if macro.startswith('team_'): continue path_macros.append(macro) # Ensure all `files` correspond to a file for file, file_meta in botmeta['files'].items(): migrated = isinstance( file_meta, dict) and file_meta.get('migrated_to') is not None for macro in path_macros: file = file.replace('$' + macro, botmeta.get('macros', {}).get(macro, '')) if not os.path.exists(file) and not migrated: # Not a file or directory, though maybe the prefix to one? # https://github.com/ansible/ansibullbot/pull/1023 if not glob.glob('%s*' % file): print("%s:%d:%d: Can't find '%s.*' in this branch" % (path, 0, 0, file))
class Feature: """ Model feature """ SCHEMA = Schema( { Required('name'): All(schemas.key, Length(max=256)), Required('metric'): All(schemas.key, Length(max=256)), Required('field'): All(schemas.dotted_key, Length(max=256)), 'bucket': Any(None, schemas.key), 'measurement': Any(None, schemas.dotted_key), 'match_all': Any( None, Schema([ { Required(schemas.key): Any( int, bool, float, All(str, Length(max=256)), ) }, ])), 'default': Any(None, int, float, 'previous'), Optional('io', default='io'): Any('io', 'o', 'i'), 'script': Any(None, str), Optional('anomaly_type', default='low_high'): Any('low', 'high', 'low_high'), 'transform': Any(None, "diff"), 'scores': Any(None, "min_max", "normalize", "standardize"), }, extra=ALLOW_EXTRA) def __init__( self, name=None, metric=None, field=None, bucket=None, measurement=None, match_all=None, default=None, script=None, anomaly_type='low_high', transform=None, scores=None, io='io', ): self.validate(locals()) self.name = name self.metric = metric self.bucket = bucket self.measurement = measurement self.field = field self.default = np.nan if default is None else default self.script = script self.match_all = match_all self.anomaly_type = anomaly_type self.is_input = 'i' in io self.is_output = 'o' in io self.transform = transform self.scores = "min_max" if scores is None else scores self.agg_id = self.build_agg_id() def build_agg_id(self): prefix = self.measurement if not self.match_all: return prefix or 'all' return "{}_{}".format(prefix, misc.hash_dict(self.match_all)) @classmethod def validate(cls, args): del args['self'] return schemas.validate(cls.SCHEMA, args)
# Prefix to add to scopes controlling scriptworkers Required('scope-prefix'): basestring, # Mapping of scriptworker types to scopes they accept Required('worker-types'): { basestring: [basestring] } }, Required('task-priority'): optionally_keyed_by( 'project', Any( 'highest', 'very-high', 'high', 'medium', 'low', 'very-low', 'lowest', )), Required('partner-urls'): { Required('release-partner-repack'): optionally_keyed_by('release-product', 'release-level', 'release-type', Any(basestring, None)), Required('release-eme-free-repack'): optionally_keyed_by('release-product', 'release-level', 'release-type', Any(basestring, None)), }, Required('workers'): { Required('aliases'): { text_type: {
from taskgraph.parameters import extend_parameters_schema from voluptuous import ( Any, Optional, Required, ) PROJECT_SPECIFIC_PREFIXES = { "refs/heads/dev-": "dev", "refs/heads/production-": "production", } PUSH_TAGS = ("dev", "production") scriptworker_schema = { Optional('docker_tag'): Any(basestring, None), Optional('push_docker_image'): Any(True, "force", False, None), Optional('script_name'): Any(basestring, None), Optional('script_revision'): Any(basestring, None), Optional('shipping_phase'): Any("build", "promote", None), } extend_parameters_schema(scriptworker_schema) def get_decision_parameters(graph_config, parameters): """Add repo-specific decision parameters. If we're on a production- or dev- branch, detect and set the `script_name`. """
# --custom-build-variant-cfg value Optional('custom-build-variant-cfg'): basestring, # Extra configuration options to pass to mozharness. Optional('extra-config'): dict, # Extra metadata to use toward the workspace caching. # Only supported on docker-worker Optional('extra-workspace-cache-key'): basestring, # If not false, tooltool downloads will be enabled via relengAPIProxy # for either just public files, or all files. Not supported on Windows Required('tooltool-downloads'): Any( False, 'public', 'internal', ), # The set of secret names to which the task has access; these are prefixed # with `project/releng/gecko/{treeherder.kind}/level-{level}/`. Setting # this will enable any worker features required and set the task's scopes # appropriately. `true` here means ['*'], all secrets. Not supported on # Windows Required('secrets'): Any(bool, [basestring]), # If true, taskcluster proxy will be enabled; note that it may also be enabled # automatically e.g., for secrets support. Not supported on Windows. Required('taskcluster-proxy'): bool, # If true, the build scripts will start Xvfb. Not supported on Windows.
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from voluptuous import PREVENT_EXTRA, Any, Required, Schema, Self from ansible.module_utils.six import string_types list_string_types = list(string_types) suboption_schema = Schema( { Required('description'): Any(list_string_types, *string_types), 'required': bool, 'choices': list, 'aliases': Any(list_string_types), 'version_added': Any(float, *string_types), 'default': Any(None, float, int, bool, list, dict, *string_types), # Note: Types are strings, not literal bools, such as True or False 'type': Any(None, "bool"), # Recursive suboptions 'suboptions':
from voluptuous import Any, Required transforms = TransformSequence() langpack_sign_push_description_schema = schema.extend({ Required('label'): text_type, Required('description'): text_type, Required('worker-type'): optionally_keyed_by('release-level', text_type), Required('worker'): { Required('implementation'): 'push-addons', Required('channel'): optionally_keyed_by('project', 'platform', Any('listed', 'unlisted')), Required('upstream-artifacts'): None, # Processed here below }, Required('run-on-projects'): [], Required('scopes'): optionally_keyed_by('release-level', [text_type]), Required('shipping-phase'): task_description_schema['shipping-phase'], Required('shipping-product'): task_description_schema['shipping-product'], }) @transforms.add def set_label(config, jobs):