import json import logging from types import MappingProxyType from schema import Schema, And, Or, Use CONFIG_SCHEMA = Schema({ "agent": { "name": And(str, len), "user_title": And(str, len) }, "plugin_modules": [And(str, len)], "module_config": { # namespace of module -> dict of config values }, "logging": { "global_log_level": And(str, Use(str.upper), lambda s: s in ("DEBUG", "INFO", "ERROR", "WARN"), Use(lambda e: getattr(logging, e))), "log_file_location": And(str, len), "granular_log_levels": [{ "namespace": And(str, len), "level": And(str, Use(str.upper), lambda s: s in ("DEBUG", "INFO", "ERROR", "WARN"), Use(lambda e: getattr(logging, e))) }], "truncate_log_file": bool,
def setChoice(key, *args): '''check choice''' return And(lambda n: n in args, error=SCHEMA_RANGE_ERROR % (key, str(args)))
def setPathCheck(key): '''check if path exist''' return And(os.path.exists, error=SCHEMA_PATH_ERROR % key)
def query_network(): """Handle API request '/network'. API Request Parameters ---------------------- ids : list of int nodes_limit : int edges_limit : int include_user_mentions : bool API Response Keys ----------------- status : string num_of_entries : int edges : dict canonical_url : string date_published : string formatted datetime domain : string from_user_id : string from_user_screen_name : string id : int is_mention : bool site_type : {'claim', 'fact_checking'} title : string to_user_id : string to_user_screen_name : string tweet_created_at : string formatted datetime tweet_id: string tweet_type: {'origin', 'retweet', 'quote', 'reply'} """ lucene.getVMEnv().attachCurrentThread() q_network_schema = Schema({ 'ids': Use(flask.json.loads), Optional('nodes_limit', default=1000): And(Use(int), lambda i: i > 0), Optional('edges_limit', default=12500): And(Use(int), lambda i: i > 0), Optional('include_user_mentions', default=True): And(unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_network_schema.validate(q_kwargs) df = db_query_network(engine, **q_kwargs) if len(df) == 0: raise APINoResultError('No edge could be built!') response = dict( status='OK', num_of_entries=len(df), edges=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
'''check number range''' return And( And(keyType, error=SCHEMA_TYPE_ERROR % (key, keyType.__name__)), And(lambda n: start <= n <= end, error=SCHEMA_RANGE_ERROR % (key, '(%s,%s)' % (start, end))), ) def setPathCheck(key): '''check if path exist''' return And(os.path.exists, error=SCHEMA_PATH_ERROR % key) common_schema = { 'authorName': setType('authorName', str), 'experimentName': setType('experimentName', str), Optional('description'): setType('description', str), 'trialConcurrency': setNumberRange('trialConcurrency', int, 1, 99999), Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$',error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')), Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999), 'trainingServicePlatform': setChoice('trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller'), Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'), Optional('multiPhase'): setType('multiPhase', bool), Optional('multiThread'): setType('multiThread', bool), Optional('nniManagerIp'): setType('nniManagerIp', str), Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'), Optional('debug'): setType('debug', bool), Optional('versionCheck'): setType('versionCheck', bool), Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'), Optional('logCollection'): setChoice('logCollection', 'http', 'none'), 'useAnnotation': setType('useAnnotation', bool), Optional('tuner'): dict, Optional('advisor'): dict, Optional('assessor'): dict,
def main(argv, session): args = docopt(__doc__, argv=argv) # Validate args. s = Schema({ six.text_type: bool, '<identifier>': list, '--modify': list, '--append': list, '--append-list': list, '--remove': list, '--spreadsheet': Or(None, And(lambda f: os.path.exists(f), error='<file> should be a readable file or directory.')), '--target': Or(None, str), '--priority': Or(None, Use(int, error='<priority> should be an integer.')), }) try: args = s.validate(args) except SchemaError as exc: print('{0}\n{1}'.format(str(exc), printable_usage(__doc__)), file=sys.stderr) sys.exit(1) formats = set() responses = [] for i, identifier in enumerate(args['<identifier>']): item = session.get_item(identifier) # Check existence of item. if args['--exists']: if item.exists: responses.append(True) print('{0} exists'.format(identifier)) else: responses.append(False) print('{0} does not exist'.format(identifier), file=sys.stderr) if (i + 1) == len(args['<identifier>']): if all(r is True for r in responses): sys.exit(0) else: sys.exit(1) # Modify metadata. elif args['--modify'] or args['--append'] or args['--append-list'] \ or args['--remove']: if args['--modify']: metadata_args = args['--modify'] elif args['--append']: metadata_args = args['--append'] elif args['--append-list']: metadata_args = args['--append-list'] if args['--remove']: metadata_args = args['--remove'] try: metadata = get_args_dict(metadata_args) if any('/' in k for k in metadata): metadata = get_args_dict_many_write(metadata) except ValueError: print("error: The value of --modify, --remove, --append or --append-list " "is invalid. It must be formatted as: --modify=key:value", file=sys.stderr) sys.exit(1) if args['--remove']: responses.append(remove_metadata(item, metadata, args)) else: responses.append(modify_metadata(item, metadata, args)) if (i + 1) == len(args['<identifier>']): if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1) # Get metadata. elif args['--formats']: for f in item.get_files(): formats.add(f.format) if (i + 1) == len(args['<identifier>']): print('\n'.join(formats)) # Dump JSON to stdout. else: metadata = json.dumps(item.item_metadata) print(metadata) # Edit metadata for items in bulk, using a spreadsheet as input. if args['--spreadsheet']: if not args['--priority']: args['--priority'] = -5 with io.open(args['--spreadsheet'], 'rU', newline='', encoding='utf-8') as csvfp: spreadsheet = csv.DictReader(csvfp) responses = [] for row in spreadsheet: if not row['identifier']: continue item = session.get_item(row['identifier']) if row.get('file'): del row['file'] metadata = dict((k.lower(), v) for (k, v) in row.items() if v) responses.append(modify_metadata(item, metadata, args)) if all(r.status_code == 200 for r in responses): sys.exit(0) else: for r in responses: if r.status_code == 200: continue # We still want to exit 0 if the non-200 is a # "no changes to xml" error. elif 'no changes' in r.content.decode('utf-8'): continue else: sys.exit(1)
def query_latest_articles(): """Handle API request '/latest-articles'. API Request Parameters ---------------------- past_hours : int Set the hours from now to past to be defined as latest hours. domains : object If None, return all articles in the latest hours; If str, should be one of {'fact_checking', 'claim', 'fake'}: if 'fact_checking', return fact checking articles, if 'claim', return claim articles, if 'fake', return selected fake articles, which is a subset of claim, which is selected by us. If array of domain, return articles belonging to these domains. domains_file : str When `domains` is 'fake', the actual used domains are loaded from file `domains_file`. If this file doesn't exist, then `claim` type domains would be used. API Response Keys ----------------- status : string num_of_entries : int articles : dict keys are: canonical_url : string date_published : string formatted datetime domain : string id : int site_type : {'claim', 'fact_checking'} title : string """ lucene.getVMEnv().attachCurrentThread() # Validate input of request q_articles_schema = Schema({ 'past_hours': And(Use(int), lambda x: x > 0, error='Invalid value of `past_hours`'), Optional('domains', default=None): Or(lambda s: s in ('fact_checking', 'claim', 'fake'), Use(flask.json.loads, error='Not valid values nor JSON string of `domains`')) }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_articles_schema.validate(q_kwargs) domains_file = CONF['api'].get('selected_fake_domains_path') df = db_query_latest_articles( engine, domains_file=domains_file, **q_kwargs) if len(df) == 0: raise APINoResultError('No articles found!') response = dict( status='OK', num_of_entries=len(df), articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='Parameter error', error=str(e)) except APIParseError as e: response = dict(status='Invalide query', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed!') return flask.jsonify(response)
class SchemaParser: """ Class which handles all schema logic. """ string_pattern = Regex( r'^[a-zA-Z0-9_]+$', error="Error in string: '{}', Can only have a-z, A-Z, 0-9, and _") attack_pattern = Regex( r'^[a-zA-Z0-9_<>+-.]+$', error="Error in attack name: '{}', " "Can only have a-z, A-Z, 0-9, and symbols: _ < > + - . (no whitespaces)" ) trigger = Schema( Or( { 'type': And(str, Use(str.lower), 'time'), 'start': And( int, Schema(lambda i: i >= 0, error="'start' must be positive."), ), 'end': And( int, Schema(lambda i: i >= 0, error="'end' must be positive."), ), }, { 'type': And(str, Use(str.lower), Or('below', 'above')), 'sensor': And( str, string_pattern, ), 'value': And(Or(float, And(int, Use(float))), ), }, { 'type': And(str, Use(str.lower), 'between'), 'sensor': And( str, string_pattern, ), 'lower_value': And(Or(float, And(int, Use(float))), ), 'upper_value': And(Or(float, And(int, Use(float))), ), }, )) device_attacks = Schema({ 'name': And( str, attack_pattern, ), 'trigger': trigger, 'actuator': And( str, string_pattern, ), 'command': And(str, Use(str.lower), Or('open', 'closed')) }) network_attacks = Schema( Or( { 'type': And( str, Use(str.lower), 'naive_mitm', ), 'name': And( str, string_pattern, Schema( lambda name: 1 <= len(name) <= 10, error= "Length of name must be between 1 and 10, '{}' has invalid length" )), 'trigger': trigger, Or('value', 'offset', only_one=True, error="'tags' should have either a 'value' or 'offset' attribute."): Or(float, And(int, Use(float))), 'target': And(str, string_pattern) }, { 'type': And( str, Use(str.lower), 'mitm', ), 'name': And( str, string_pattern, Schema( lambda name: 1 <= len(name) <= 10, error= "Length of name must be between 1 and 10, '{}' has invalid length" )), 'trigger': trigger, 'target': And(str, string_pattern), 'tags': [{ 'tag': And( str, string_pattern, ), Or('value', 'offset', only_one=True, error="'tags' should have either a 'value' or 'offset' attribute."): Or(float, And(int, Use(float))), }] })) @staticmethod def path_schema(data: dict, config_path: Path) -> dict: """ For all the values that need to be a path, this function converts them to absolute paths, checks if they exists, and checks the suffix if applicable. :param data: data from the config file :type data: dict :param config_path: That to the config file :type config_path: :return: the config data, but with existing absolute path objects :rtype: dict """ return Schema( And({ 'inp_file': And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'inp_file' could not be found."), Schema(lambda f: f.suffix == '.inp', error="Suffix of 'inp_file' should be .inp.")), Optional('output_path', default=config_path.absolute().parent / 'output'): And( Use(str, error="'output_path' should be a string."), Use(Path), Use(lambda p: config_path.absolute().parent / p), ), Optional('initial_tank_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'initial_tank_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of initial_tank_data should be .csv")), Optional('demand_patterns'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.exists, error="'demand_patterns' path does not exist."), Or( Path.is_dir, Schema(lambda f: f.suffix == '.csv', error="Suffix of demand_patterns should be .csv" ))), Optional('network_loss_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'network_loss_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of network_loss_data should be .csv")), Optional('network_delay_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'network_delay_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of network_delay_data should be .csv")), str: object })).validate(data) @staticmethod def validate_schema(data: dict) -> dict: """ Apply a schema to the data. This schema make sure that every reuired parameter is given. It also fills in default values for missing parameters. It will test for types of parameters as well. Besides that, it converts some strings to lower case, like those of :code:'log_level'. :param data: data from the config file :type data: dict :return: A verified version of the data of the config file :rtype: dict """ plc_schema = Schema([{ 'name': And( str, SchemaParser.string_pattern, Schema( lambda name: 1 <= len(name) <= 10, error= "Length of name must be between 1 and 10, '{}' has invalid length" )), Optional('sensors'): [And(str, SchemaParser.string_pattern)], Optional('actuators'): [And(str, SchemaParser.string_pattern)] }]) config_schema = Schema({ 'plcs': plc_schema, 'inp_file': Path, Optional('network_topology_type', default='simple'): And(str, Use(str.lower), Or('complex', 'simple')), 'output_path': Path, Optional('iterations'): And( int, Schema(lambda i: i > 0, error="'iterations' must be positive.")), Optional('mininet_cli', default=False): bool, Optional('log_level', default='info'): And( str, Use(str.lower), Or('debug', 'info', 'warning', 'error', 'critical', error="'log_level' should be " "one of the following: " "'debug', 'info', 'warning', " "'error' or 'critical'.")), Optional('simulator', default='pdd'): And(str, Use(str.lower), Or('pdd', 'dd'), error= "'simulator' should be one of the following: 'pdd' or 'dd'."), Optional('attacks'): { Optional('device_attacks'): [SchemaParser.device_attacks], Optional('network_attacks'): [SchemaParser.network_attacks], }, Optional('batch_simulations'): And( int, Schema(lambda i: i > 0, error="'batch_simulations' must be positive.")), Optional('saving_interval'): And( int, Schema(lambda i: i > 0, error="'saving_interval' must be positive.")), Optional('initial_tank_data'): Path, Optional('demand_patterns'): Path, Optional('network_loss_data'): Path, Optional('network_delay_data'): Path, }) return config_schema.validate(data)
MultiDataNormMetricsCalculator, DistMetricsCalculator, APoZRankMetricsCalculator, MeanRankMetricsCalculator) from .tools import (SparsityAllocator, NormalSparsityAllocator, GlobalSparsityAllocator, Conv2dDependencyAwareAllocator) _logger = logging.getLogger(__name__) __all__ = [ 'LevelPruner', 'L1NormPruner', 'L2NormPruner', 'FPGMPruner', 'SlimPruner', 'ActivationPruner', 'ActivationAPoZRankPruner', 'ActivationMeanRankPruner', 'TaylorFOWeightPruner', 'ADMMPruner' ] NORMAL_SCHEMA = { Or('sparsity', 'sparsity_per_layer'): And(float, lambda n: 0 <= n < 1), SchemaOptional('op_types'): [str], SchemaOptional('op_names'): [str], SchemaOptional('op_partial_names'): [str] } GLOBAL_SCHEMA = { 'total_sparsity': And(float, lambda n: 0 <= n < 1), SchemaOptional('max_sparsity_per_layer'): And(float, lambda n: 0 < n <= 1), SchemaOptional('op_types'): [str], SchemaOptional('op_names'): [str], SchemaOptional('op_partial_names'): [str] } EXCLUDE_SCHEMA = { 'exclude': bool,
def path_schema(data: dict, config_path: Path) -> dict: """ For all the values that need to be a path, this function converts them to absolute paths, checks if they exists, and checks the suffix if applicable. :param data: data from the config file :type data: dict :param config_path: That to the config file :type config_path: :return: the config data, but with existing absolute path objects :rtype: dict """ return Schema( And({ 'inp_file': And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'inp_file' could not be found."), Schema(lambda f: f.suffix == '.inp', error="Suffix of 'inp_file' should be .inp.")), Optional('output_path', default=config_path.absolute().parent / 'output'): And( Use(str, error="'output_path' should be a string."), Use(Path), Use(lambda p: config_path.absolute().parent / p), ), Optional('initial_tank_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'initial_tank_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of initial_tank_data should be .csv")), Optional('demand_patterns'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.exists, error="'demand_patterns' path does not exist."), Or( Path.is_dir, Schema(lambda f: f.suffix == '.csv', error="Suffix of demand_patterns should be .csv" ))), Optional('network_loss_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'network_loss_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of network_loss_data should be .csv")), Optional('network_delay_data'): And( Use(Path), Use(lambda p: config_path.absolute().parent / p), Schema(lambda l: Path.is_file, error="'network_delay_data' could not be found."), Schema( lambda f: f.suffix == '.csv', error="Suffix of network_delay_data should be .csv")), str: object })).validate(data)
def validate_schema(data: dict) -> dict: """ Apply a schema to the data. This schema make sure that every reuired parameter is given. It also fills in default values for missing parameters. It will test for types of parameters as well. Besides that, it converts some strings to lower case, like those of :code:'log_level'. :param data: data from the config file :type data: dict :return: A verified version of the data of the config file :rtype: dict """ plc_schema = Schema([{ 'name': And( str, SchemaParser.string_pattern, Schema( lambda name: 1 <= len(name) <= 10, error= "Length of name must be between 1 and 10, '{}' has invalid length" )), Optional('sensors'): [And(str, SchemaParser.string_pattern)], Optional('actuators'): [And(str, SchemaParser.string_pattern)] }]) config_schema = Schema({ 'plcs': plc_schema, 'inp_file': Path, Optional('network_topology_type', default='simple'): And(str, Use(str.lower), Or('complex', 'simple')), 'output_path': Path, Optional('iterations'): And( int, Schema(lambda i: i > 0, error="'iterations' must be positive.")), Optional('mininet_cli', default=False): bool, Optional('log_level', default='info'): And( str, Use(str.lower), Or('debug', 'info', 'warning', 'error', 'critical', error="'log_level' should be " "one of the following: " "'debug', 'info', 'warning', " "'error' or 'critical'.")), Optional('simulator', default='pdd'): And(str, Use(str.lower), Or('pdd', 'dd'), error= "'simulator' should be one of the following: 'pdd' or 'dd'."), Optional('attacks'): { Optional('device_attacks'): [SchemaParser.device_attacks], Optional('network_attacks'): [SchemaParser.network_attacks], }, Optional('batch_simulations'): And( int, Schema(lambda i: i > 0, error="'batch_simulations' must be positive.")), Optional('saving_interval'): And( int, Schema(lambda i: i > 0, error="'saving_interval' must be positive.")), Optional('initial_tank_data'): Path, Optional('demand_patterns'): Path, Optional('network_loss_data'): Path, Optional('network_delay_data'): Path, }) return config_schema.validate(data)
# @Vendors from schema import Schema, And, Use, Optional # @Constants from nlp_model_gen.constants.constants import TRAIN_MANAGER_SCHEMAS custom_entity_tag_schema = Schema({ 'name': And(str, len), 'description': And(str, len) }) train_example_data_schema = Schema({ 'sentence': And(str, len), 'type': And(str, len), Optional('tags'): [{ 'entity': And(str, len), 'i_pos': And(Use(int)), 'e_pos': And(Use(int)) }] }) schemas = dict({}) schemas[TRAIN_MANAGER_SCHEMAS['CUSTOM_ENTITY']] = custom_entity_tag_schema schemas[TRAIN_MANAGER_SCHEMAS['TRAIN_DATA']] = train_example_data_schema def validate_data(schema_key, data): if not schema_key in schemas.keys(): return False return schemas[schema_key].is_valid(data)
import json from flask import jsonify from schema import And, Optional, Schema from pysite.base_route import APIView from pysite.constants import ValidationTypes from pysite.decorators import api_key, api_params from pysite.mixins import DBMixin GET_SCHEMA = Schema({ # This is passed as a GET parameter, so it has to be a string Optional('user_id'): And(str, str.isnumeric, error="`user_id` must be a numeric string") }) POST_SCHEMA = Schema({ 'user_id': And(str, str.isnumeric, error="`user_id` must be a numeric string"), 'channel_id': And(str, str.isnumeric, error="`channel_id` must be a numeric string") }) DELETE_SCHEMA = Schema({ 'user_id': And(str, str.isnumeric, error="`user_id` must be a numeric string") }) NOT_A_NUMBER_JSON = json.dumps( {'error_message': "The given `user_id` parameter is not a valid number"}) NOT_FOUND_JSON = json.dumps(
from models import * import json from base64 import b64encode from schema import Schema, And, Use, Optional vul_schema = Schema( { 'cwe': And(Use(int)), 'name': And(str, len), 'tool': And(str, len), 'severity': And(Use(int), lambda n: 0 <= n <= 3), 'description': And(str, len), 'target_name': And(str, len), Optional('observation'): And(str, len), Optional('remediation'): And(str, len), }, ignore_extra_keys=False) def parse_zap_json_file(zap_file, target, session): with open(zap_file, 'r') as zapfile: zap_data = json.loads(zapfile.read()) alerts = zap_data['Report']['Sites']['Alerts']['AlertItem'] if alerts: for alert in alerts: vul = Vulnerability() vul.tool = 'zap' vul.target = target vul.name = alert['Alert'] if alert['RiskDesc'] == 'High':
# The strings in the schema are constants and should get extracted, but not translated until # runtime. translate = _ _ = lambda s: s YAML_SCHEMA = Schema( Or( { UseOptional(COMMAND): Or( { Or(str, int): Or( { Or(int, "default"): And(bool, error=_( "Rules must be either `true` or `false`.")) }, {}, error= _("Keys under command names must be IDs (numbers) or `default`." ), ) }, {}, error=_( "Keys under `COMMAND` must be command names (strings)."), ), UseOptional(COG): Or( {
class Stage(object): STAGE_FILE = "Dvcfile" STAGE_FILE_SUFFIX = ".dvc" PARAM_MD5 = "md5" PARAM_CMD = "cmd" PARAM_DEPS = "deps" PARAM_OUTS = "outs" PARAM_LOCKED = "locked" SCHEMA = { Optional(PARAM_MD5): Or(str, None), Optional(PARAM_CMD): Or(str, None), Optional(PARAM_DEPS): Or(And(list, Schema([dependency.SCHEMA])), None), Optional(PARAM_OUTS): Or(And(list, Schema([output.SCHEMA])), None), Optional(PARAM_LOCKED): bool, } def __init__( self, project, path=None, cmd=None, cwd=os.curdir, deps=None, outs=None, md5=None, locked=False, ): if deps is None: deps = [] if outs is None: outs = [] self.project = project self.path = path self.cmd = cmd self.cwd = cwd self.outs = outs self.deps = deps self.md5 = md5 self.locked = locked def __repr__(self): return "Stage: '{path}'".format( path=self.relpath if self.path else "No path" ) @property def relpath(self): return os.path.relpath(self.path) @property def is_data_source(self): """Whether the stage file was created with `dvc add` or `dvc import`""" return self.cmd is None @staticmethod def is_valid_filename(path): return ( path.endswith(Stage.STAGE_FILE_SUFFIX) or os.path.basename(path) == Stage.STAGE_FILE ) @staticmethod def is_stage_file(path): return os.path.isfile(path) and Stage.is_valid_filename(path) def changed_md5(self): return self.md5 != self._compute_md5() @property def is_callback(self): """ A callback stage is always considered as changed, so it runs on every `dvc repro` call. """ return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): """Whether the stage file was created with `dvc import`.""" return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1 def _changed_deps(self): if self.locked: return False if self.is_callback: logger.warning( "Dvc file '{fname}' is a 'callback' stage (has a command and" " no dependencies) and thus always considered as changed.".format( fname=self.relpath ) ) return True for dep in self.deps: if dep.changed(): logger.warning( "Dependency '{dep}' of '{stage}' changed.".format( dep=dep, stage=self.relpath ) ) return True return False def _changed_outs(self): for out in self.outs: if out.changed(): logger.warning( "Output '{out}' of '{stage}' changed.".format( out=out, stage=self.relpath ) ) return True return False def _changed_md5(self): if self.changed_md5(): logger.warning("Dvc file '{}' changed.".format(self.relpath)) return True return False def changed(self): ret = any( [self._changed_deps(), self._changed_outs(), self._changed_md5()] ) if ret: msg = "Stage '{}' changed.".format(self.relpath) color = "yellow" else: msg = "Stage '{}' didn't change.".format(self.relpath) color = "green" logger.info(logger.colorize(msg, color)) return ret def remove_outs(self, ignore_remove=False): """ Used mainly for `dvc remove --outs` """ for out in self.outs: out.remove(ignore_remove=ignore_remove) def unprotect_outs(self): for out in self.outs: if out.scheme != "local" or not out.exists: continue self.project.unprotect(out.path) def remove(self): self.remove_outs(ignore_remove=True) os.unlink(self.path) def reproduce(self, force=False, dry=False, interactive=False): if not self.changed() and not force: return None if (self.cmd or self.is_import) and not self.locked and not dry: # Removing outputs only if we actually have command to reproduce self.remove_outs(ignore_remove=False) msg = "Going to reproduce '{stage}'. Are you sure you want to continue?".format( stage=self.relpath ) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") logger.info("Reproducing '{stage}'".format(stage=self.relpath)) self.run(dry=dry) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self @staticmethod def validate(d, fname=None): from dvc.utils import convert_to_unicode try: Schema(Stage.SCHEMA).validate(convert_to_unicode(d)) except SchemaError as exc: raise StageFileFormatError(fname, exc) @classmethod def _stage_fname_cwd(cls, fname, cwd, outs, add): if fname and cwd: return (fname, cwd) if not outs: return (cls.STAGE_FILE, cwd if cwd else os.getcwd()) out = outs[0] if out.scheme == "local": path = os.path else: path = posixpath if not fname: fname = path.basename(out.path) + cls.STAGE_FILE_SUFFIX if not cwd or (add and out.is_local): cwd = path.dirname(out.path) return (fname, cwd) @staticmethod def _check_inside_project(project, cwd): assert project is not None proj_dir = os.path.realpath(project.root_dir) if not os.path.realpath(cwd).startswith(proj_dir): raise StageBadCwdError(cwd) @property def is_cached(self): """ Checks if this stage has been already ran and stored """ from dvc.remote.local import RemoteLOCAL from dvc.remote.s3 import RemoteS3 old = Stage.load(self.project, self.path) if old._changed_outs(): return False # NOTE: need to save checksums for deps in order to compare them # with what is written in the old stage. for dep in self.deps: dep.save() old_d = old.dumpd() new_d = self.dumpd() # NOTE: need to remove checksums from old dict in order to compare # it to the new one, since the new one doesn't have checksums yet. old_d.pop(self.PARAM_MD5, None) new_d.pop(self.PARAM_MD5, None) outs = old_d.get(self.PARAM_OUTS, []) for out in outs: out.pop(RemoteLOCAL.PARAM_CHECKSUM, None) out.pop(RemoteS3.PARAM_CHECKSUM, None) return old_d == new_d @staticmethod def create( project=None, cmd=None, deps=None, outs=None, outs_no_cache=None, metrics=None, metrics_no_cache=None, fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, ): if outs is None: outs = [] if deps is None: deps = [] if outs_no_cache is None: outs_no_cache = [] if metrics is None: metrics = [] if metrics_no_cache is None: metrics_no_cache = [] stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from( stage, metrics, use_cache=True, metric=True ) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from( stage, metrics_no_cache, use_cache=False, metric=True ) stage.deps = dependency.loads_from(stage, deps) stage._check_circular_dependency() stage._check_duplicated_arguments() if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' should not contain subdirectories." " Use '-c|--cwd' to change location of the stage file.".format( fname=fname ) ) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) Stage._check_inside_project(project, cwd) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath) ) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage @staticmethod def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( "bad stage filename '{}'. Stage files should be named" " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( os.path.relpath(fname), os.path.basename(fname) ) ) @staticmethod def _check_file_exists(fname): if not os.path.exists(fname): raise StageFileDoesNotExistError(fname) @staticmethod def load(project, fname): Stage._check_file_exists(fname) Stage._check_dvc_filename(fname) if not Stage.is_stage_file(fname): raise StageFileIsNotDvcFileError(fname) with open(fname, "r") as fd: d = yaml.safe_load(fd) or {} Stage.validate(d, fname=os.path.relpath(fname)) stage = Stage( project=project, path=os.path.abspath(fname), cwd=os.path.dirname(os.path.abspath(fname)), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage def dumpd(self): return { key: value for key, value in { Stage.PARAM_MD5: self.md5, Stage.PARAM_CMD: self.cmd, Stage.PARAM_LOCKED: self.locked, Stage.PARAM_DEPS: [d.dumpd() for d in self.deps], Stage.PARAM_OUTS: [o.dumpd() for o in self.outs], }.items() if value } def dump(self, fname=None): fname = fname or self.path self._check_dvc_filename(fname) logger.info( "Saving information to '{file}'.".format( file=os.path.relpath(fname) ) ) with open(fname, "w") as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) self.project.files_to_git_add.append(os.path.relpath(fname)) def _compute_md5(self): from dvc.output.local import OutputLOCAL d = self.dumpd() # NOTE: removing md5 manually in order to not affect md5s in deps/outs if self.PARAM_MD5 in d.keys(): del d[self.PARAM_MD5] # NOTE: excluding parameters that don't affect the state of the # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if # it has changed, we might not have that output in our cache. return dict_md5( d, exclude=[self.PARAM_LOCKED, OutputLOCAL.PARAM_METRIC] ) def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() self.md5 = self._compute_md5() def _check_missing_deps(self): missing = [dep for dep in self.deps if not dep.exists] if any(missing): raise MissingDep(missing) @staticmethod def _warn_if_fish(executable): # pragma: no cover if ( executable is None or os.path.basename(os.path.realpath(executable)) != "fish" ): return logger.warning( "DVC detected that you are using fish as your default " "shell. Be aware that it might cause problems by overwriting " "your current environment variables with values defined " "in '.fishrc', which might affect your command. See " "https://github.com/iterative/dvc/issues/1307. " ) def _check_circular_dependency(self): from dvc.exceptions import CircularDependencyError circular_dependencies = set(d.path for d in self.deps) & set( o.path for o in self.outs ) if circular_dependencies: raise CircularDependencyError(circular_dependencies.pop()) def _check_duplicated_arguments(self): from dvc.exceptions import ArgumentDuplicationError from collections import Counter path_counts = Counter(edge.path for edge in self.deps + self.outs) for path, occurrence in path_counts.items(): if occurrence > 1: raise ArgumentDuplicationError(path) def _run(self): self._check_missing_deps() executable = os.getenv("SHELL") if os.name != "nt" else None self._warn_if_fish(executable) p = subprocess.Popen( self.cmd, cwd=self.cwd, shell=True, env=fix_env(os.environ), executable=executable, ) p.communicate() if p.returncode != 0: raise StageCmdFailedError(self) def run(self, dry=False, resume=False): if self.locked: logger.info( "Verifying outputs in locked stage '{stage}'".format( stage=self.relpath ) ) if not dry: self.check_missing_outputs() elif self.is_import: logger.info( "Importing '{dep}' -> '{out}'".format( dep=self.deps[0].path, out=self.outs[0].path ) ) if not dry: if self._already_cached(): self.outs[0].checkout() else: self.deps[0].download( self.outs[0].path_info, resume=resume ) elif self.is_data_source: msg = "Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: logger.info("Running command:\n\t{}".format(self.cmd)) if not dry: if self._already_cached(): self.checkout() else: self._run() if not dry: self.save() def check_missing_outputs(self): paths = [ out.path if out.scheme != "local" else out.rel_path for out in self.outs if not out.exists ] if paths: raise MissingDataSource(paths) def checkout(self, force=False): for out in self.outs: out.checkout(force=force) @staticmethod def _status(entries, name): ret = {} for entry in entries: ret.update(entry.status()) if ret: return {name: ret} return {} def status(self): ret = {} if not self.locked: ret.update(self._status(self.deps, "deps")) ret.update(self._status(self.outs, "outs")) if ret or self.changed_md5() or self.is_callback: return {self.relpath: ret} return {} def _already_cached(self): return ( not self.changed_md5() and all(not dep.changed() for dep in self.deps) and all( not out.changed_cache() if out.use_cache else not out.changed() for out in self.outs ) )
from http import HTTPStatus from flask import jsonify, Blueprint, current_app, request from schema import Schema, SchemaError, And from customer_service.model import commands from customer_service.model.errors import CustomerNotFound customers = Blueprint('customers', __name__, url_prefix='/customers/') CREATE_PAYLOAD_SCHEMA = Schema({ "firstName": And(str, len), "surname": And(str, len) }) @customers.route('/<string:customer_id>', methods=['GET']) def get_customer(customer_id): customer_repository = current_app.customer_repository customer = commands.get_customer(customer_id=int(customer_id), customer_repository=customer_repository) return jsonify(customerId=str(customer.customer_id), firstName=customer.first_name, surname=customer.surname) @customers.route('/', methods=['POST']) def create_customer(): customer_repository = current_app.customer_repository
from __future__ import annotations import typing from Options import Choice, OptionDict, OptionSet, ItemDict, Option, DefaultOnToggle, Range, DeathLink, Toggle from schema import Schema, Optional, And, Or # schema helpers FloatRange = lambda low, high: And(Or(int, float), lambda f: low <= f <= high) LuaBool = Or(bool, And(int, lambda n: n in (0, 1))) class MaxSciencePack(Choice): """Maximum level of science pack required to complete the game.""" display_name = "Maximum Required Science Pack" option_automation_science_pack = 0 option_logistic_science_pack = 1 option_military_science_pack = 2 option_chemical_science_pack = 3 option_production_science_pack = 4 option_utility_science_pack = 5 option_space_science_pack = 6 default = 6 def get_allowed_packs(self): return {option.replace("_", "-") for option, value in self.options.items() if value <= self.value} - \ {"space-science-pack"} # with rocket launch being the goal, post-launch techs don't make sense @classmethod def get_ordered_science_packs(cls): return [ option.replace("_", "-")
def query_articles(): """Handle API request '/articles'. API Request Parameters ---------------------- query : string sort_by : {'relevant', 'recent'} use_lucene_syntax : bool API Response Keys ----------------- status : string num_of_entries : int total_hits : int articles : dict keys are: canonical_url : string date_published : string formatted datetime domain : string id : int number_of_tweets : int score : float site_type : {'claim', 'fact_checking'} title : string """ lucene.getVMEnv().attachCurrentThread() # Validate input of request q_articles_schema = Schema({ 'query': lambda s: len(s) > 0, Optional('sort_by', default='relevant'): And(unicode, lambda s: s in ('relevant', 'recent')), Optional('use_lucene_syntax', default=True): And(unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_articles_schema.validate(q_kwargs) n, df = searcher.search( n1=N1, n2=N2, min_score_of_recent_sorting=MIN_SCORE, min_date_published=STRAMING_START_AT, **q_kwargs) df = db_query_filter_disabled_site(engine, df) df = db_query_twitter_shares(engine, df) if len(df) == 0: raise APINoResultError('No article found!') # sort dataframe by 'number_of_tweets' df = df.sort_values('number_of_tweets', ascending=False) response = dict( status='OK', num_of_entries=len(df), total_hits=n, articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='Parameter error', error=str(e)) except APIParseError as e: response = dict(status='Invalide query', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed!') return flask.jsonify(response)
class FactorioWorldGen(OptionDict): """World Generation settings. Overview of options at https://wiki.factorio.com/Map_generator, with in-depth documentation at https://lua-api.factorio.com/latest/Concepts.html#MapGenSettings""" display_name = "World Generation" # FIXME: do we want default be a rando-optimized default or in-game DS? value: typing.Dict[str, typing.Dict[str, typing.Any]] default = { "terrain_segmentation": 0.5, "water": 1.5, "autoplace_controls": { "coal": { "frequency": 1, "size": 3, "richness": 6 }, "copper-ore": { "frequency": 1, "size": 3, "richness": 6 }, "crude-oil": { "frequency": 1, "size": 3, "richness": 6 }, "enemy-base": { "frequency": 1, "size": 1, "richness": 1 }, "iron-ore": { "frequency": 1, "size": 3, "richness": 6 }, "stone": { "frequency": 1, "size": 3, "richness": 6 }, "trees": { "frequency": 1, "size": 1, "richness": 1 }, "uranium-ore": { "frequency": 1, "size": 3, "richness": 6 } }, "seed": None, "starting_area": 1, "peaceful_mode": False, "cliff_settings": { "name": "cliff", "cliff_elevation_0": 10, "cliff_elevation_interval": 40, "richness": 1 }, "property_expression_names": { "control-setting:moisture:bias": 0, "control-setting:moisture:frequency:multiplier": 1, "control-setting:aux:bias": 0, "control-setting:aux:frequency:multiplier": 1 }, "pollution": { "enabled": True, "diffusion_ratio": 0.02, "ageing": 1, "enemy_attack_pollution_consumption_modifier": 1, "min_pollution_to_damage_trees": 60, "pollution_restored_per_tree_damage": 10 }, "enemy_evolution": { "enabled": True, "time_factor": 40.0e-7, "destroy_factor": 200.0e-5, "pollution_factor": 9.0e-7 }, "enemy_expansion": { "enabled": True, "max_expansion_distance": 7, "settler_group_min_size": 5, "settler_group_max_size": 20, "min_expansion_cooldown": 14400, "max_expansion_cooldown": 216000 } } schema = Schema({ "basic": { Optional("terrain_segmentation"): FloatRange(0.166, 6), Optional("water"): FloatRange(0.166, 6), Optional("autoplace_controls"): { str: { "frequency": FloatRange(0, 6), "size": FloatRange(0, 6), "richness": FloatRange(0.166, 6) } }, Optional("seed"): Or(None, And(int, lambda n: n >= 0)), Optional("width"): And(int, lambda n: n >= 0), Optional("height"): And(int, lambda n: n >= 0), Optional("starting_area"): FloatRange(0.166, 6), Optional("peaceful_mode"): LuaBool, Optional("cliff_settings"): { "name": str, "cliff_elevation_0": FloatRange(0, 99), "cliff_elevation_interval": FloatRange(0.066, 241), # 40/frequency "richness": FloatRange(0, 6) }, Optional("property_expression_names"): Schema({ Optional("control-setting:moisture:bias"): FloatRange(-0.5, 0.5), Optional("control-setting:moisture:frequency:multiplier"): FloatRange(0.166, 6), Optional("control-setting:aux:bias"): FloatRange(-0.5, 0.5), Optional("control-setting:aux:frequency:multiplier"): FloatRange(0.166, 6), Optional(str): object # allow overriding all properties }), }, "advanced": { Optional("pollution"): { Optional("enabled"): LuaBool, Optional("diffusion_ratio"): FloatRange(0, 0.25), Optional("ageing"): FloatRange(0.1, 4), Optional("enemy_attack_pollution_consumption_modifier"): FloatRange(0.1, 4), Optional("min_pollution_to_damage_trees"): FloatRange(0, 9999), Optional("pollution_restored_per_tree_damage"): FloatRange(0, 9999) }, Optional("enemy_evolution"): { Optional("enabled"): LuaBool, Optional("time_factor"): FloatRange(0, 1000e-7), Optional("destroy_factor"): FloatRange(0, 1000e-5), Optional("pollution_factor"): FloatRange(0, 1000e-7), }, Optional("enemy_expansion"): { Optional("enabled"): LuaBool, Optional("max_expansion_distance"): FloatRange(2, 20), Optional("settler_group_min_size"): FloatRange(1, 20), Optional("settler_group_max_size"): FloatRange(1, 50), Optional("min_expansion_cooldown"): FloatRange(3600, 216000), Optional("max_expansion_cooldown"): FloatRange(18000, 648000) } } }) def __init__(self, value: typing.Dict[str, typing.Any]): advanced = {"pollution", "enemy_evolution", "enemy_expansion"} self.value = { "basic": {key: value[key] for key in value.keys() - advanced}, "advanced": {key: value[key] for key in value.keys() & advanced} } # verify min_values <= max_values def optional_min_lte_max(container, min_key, max_key): min_val = container.get(min_key, None) max_val = container.get(max_key, None) if min_val is not None and max_val is not None and min_val > max_val: raise ValueError(f"{min_key} can't be bigger than {max_key}") enemy_expansion = self.value["advanced"].get("enemy_expansion", {}) optional_min_lte_max(enemy_expansion, "settler_group_min_size", "settler_group_max_size") optional_min_lte_max(enemy_expansion, "min_expansion_cooldown", "max_expansion_cooldown") @classmethod def from_any(cls, data: typing.Dict[str, typing.Any]) -> FactorioWorldGen: if type(data) == dict: return cls(data) else: raise NotImplementedError( f"Cannot Convert from non-dictionary, got {type(data)}")
def query_timeline(): """Handle API '/timeline'. API Request Parameters ---------------------- ids : list of int resolution : character in 'HDWM' API Response Keys ----------------- status : string timeline : dict claim : dict timestamp : list of string formatted datetime volume : list of int fact_checking : dict timestamp : list of string formatted datetime volume : list of int """ lucene.getVMEnv().attachCurrentThread() q_tweets_schema = Schema({ 'ids': And(Use(flask.json.loads, error="Format error of `ids`"), lambda s: len(s) > 0, error='Empty of `ids`'), Optional('resolution', default='D'): And(Use(lambda s: s.upper()), lambda s: s in 'HDWM'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_tweets_schema.validate(q_kwargs) rule = '1' + q_kwargs.pop('resolution') df = db_query_tweets(engine, q_kwargs['ids']) if len(df) == 0: raise APINoResultError('No tweet found!') df = df.set_index('tweet_created_at') df1 = df.loc[df['site_type'] == N_FACT_CHECKING] s1 = df1['tweet_id'].drop_duplicates() s1 = s1.resample(rule).count() df2 = df.loc[df['site_type'] == N_CLAIM] s2 = df2['tweet_id'].drop_duplicates() s2 = s2.resample(rule).count() s1, s2 = s1.align(s2, join='outer', fill_value=0) s1 = s1.cumsum() s2 = s2.cumsum() response = dict( status='OK', timeline=dict( fact_checking=dict( timestamp=s1.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s1.tolist()), claim=dict( timestamp=s2.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s2.tolist()))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
class Stage(object): STAGE_FILE = "Dvcfile" STAGE_FILE_SUFFIX = ".dvc" PARAM_MD5 = "md5" PARAM_CMD = "cmd" PARAM_WDIR = "wdir" PARAM_DEPS = "deps" PARAM_OUTS = "outs" PARAM_LOCKED = "locked" PARAM_META = "meta" SCHEMA = { Optional(PARAM_MD5): Or(str, None), Optional(PARAM_CMD): Or(str, None), Optional(PARAM_WDIR): Or(str, None), Optional(PARAM_DEPS): Or(And(list, Schema([dependency.SCHEMA])), None), Optional(PARAM_OUTS): Or(And(list, Schema([output.SCHEMA])), None), Optional(PARAM_LOCKED): bool, Optional(PARAM_META): object, } TAG_REGEX = r"^(?P<path>.*)@(?P<tag>[^\\/@:]*)$" def __init__( self, repo, path=None, cmd=None, wdir=os.curdir, deps=None, outs=None, md5=None, locked=False, tag=None, state=None, ): if deps is None: deps = [] if outs is None: outs = [] self.repo = repo self.path = path self.cmd = cmd self.wdir = wdir self.outs = outs self.deps = deps self.md5 = md5 self.locked = locked self.tag = tag self._state = state or {} def __repr__(self): return "Stage: '{path}'".format( path=self.relpath if self.path else "No path") @property def relpath(self): return relpath(self.path) @property def is_data_source(self): """Whether the DVC-file was created with `dvc add` or `dvc import`""" return self.cmd is None @staticmethod def is_valid_filename(path): return ( # path.endswith doesn't work for encoded unicode filenames on # Python 2 and since Stage.STAGE_FILE_SUFFIX is ascii then it is # not needed to decode the path from py2's str path[-len(Stage.STAGE_FILE_SUFFIX):] == Stage.STAGE_FILE_SUFFIX or os.path.basename(path) == Stage.STAGE_FILE) @staticmethod def is_stage_file(path): return os.path.isfile(path) and Stage.is_valid_filename(path) def changed_md5(self): return self.md5 != self._compute_md5() @property def is_callback(self): """ A callback stage is always considered as changed, so it runs on every `dvc repro` call. """ return not self.is_data_source and len(self.deps) == 0 @property def is_import(self): """Whether the DVC-file was created with `dvc import`.""" return not self.cmd and len(self.deps) == 1 and len(self.outs) == 1 @property def is_repo_import(self): if not self.is_import: return False return isinstance(self.deps[0], dependency.DependencyREPO) def _changed_deps(self): if self.locked: return False if self.is_callback: logger.warning( "DVC-file '{fname}' is a 'callback' stage " "(has a command and no dependencies) and thus always " "considered as changed.".format(fname=self.relpath)) return True for dep in self.deps: status = dep.status() if status: logger.warning( "Dependency '{dep}' of '{stage}' changed because it is " "'{status}'.".format(dep=dep, stage=self.relpath, status=status[str(dep)])) return True return False def _changed_outs(self): for out in self.outs: status = out.status() if status: logger.warning( "Output '{out}' of '{stage}' changed because it is " "'{status}'".format(out=out, stage=self.relpath, status=status[str(out)])) return True return False def _changed_md5(self): if self.changed_md5(): logger.warning("DVC-file '{}' changed.".format(self.relpath)) return True return False def changed(self): # Short-circuit order: stage md5 is fast, deps are expected to change ret = (self._changed_md5() or self._changed_deps() or self._changed_outs()) if ret: logger.warning("Stage '{}' changed.".format(self.relpath)) else: logger.info("Stage '{}' didn't change.".format(self.relpath)) return ret def remove_outs(self, ignore_remove=False, force=False): """Used mainly for `dvc remove --outs` and :func:`Stage.reproduce`.""" for out in self.outs: if out.persist and not force: out.unprotect() else: logger.debug("Removing output '{out}' of '{stage}'.".format( out=out, stage=self.relpath)) out.remove(ignore_remove=ignore_remove) def unprotect_outs(self): for out in self.outs: out.unprotect() def remove(self, force=False, remove_outs=True): if remove_outs: self.remove_outs(ignore_remove=True, force=force) else: self.unprotect_outs() os.unlink(self.path) def reproduce(self, interactive=False, **kwargs): if not kwargs.get("force", False) and not self.changed(): return None msg = ("Going to reproduce '{stage}'. " "Are you sure you want to continue?".format(stage=self.relpath)) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") self.run(**kwargs) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self def update(self): if not self.is_repo_import and not self.is_import: raise StageUpdateError(self.relpath) self.deps[0].update() locked = self.locked self.locked = False try: self.reproduce() finally: self.locked = locked @staticmethod def validate(d, fname=None): from dvc.utils import convert_to_unicode try: Schema(Stage.SCHEMA).validate(convert_to_unicode(d)) except SchemaError as exc: raise StageFileFormatError(fname, exc) @classmethod def _stage_fname(cls, outs, add): if not outs: return cls.STAGE_FILE out = outs[0] fname = out.path_info.name + cls.STAGE_FILE_SUFFIX if (add and out.is_in_repo and not contains_symlink_up_to(out.fspath, out.repo.root_dir)): fname = out.path_info.with_name(fname).fspath return fname @staticmethod def _check_stage_path(repo, path): assert repo is not None real_path = os.path.realpath(path) if not os.path.exists(real_path): raise StagePathNotFoundError(path) if not os.path.isdir(real_path): raise StagePathNotDirectoryError(path) proj_dir = os.path.realpath(repo.root_dir) + os.path.sep if not (real_path + os.path.sep).startswith(proj_dir): raise StagePathOutsideError(path) @property def is_cached(self): """ Checks if this stage has been already ran and stored """ from dvc.remote.local import RemoteLOCAL from dvc.remote.s3 import RemoteS3 old = Stage.load(self.repo, self.path) if old._changed_outs(): return False # NOTE: need to save checksums for deps in order to compare them # with what is written in the old stage. for dep in self.deps: dep.save() old_d = old.dumpd() new_d = self.dumpd() # NOTE: need to remove checksums from old dict in order to compare # it to the new one, since the new one doesn't have checksums yet. old_d.pop(self.PARAM_MD5, None) new_d.pop(self.PARAM_MD5, None) outs = old_d.get(self.PARAM_OUTS, []) for out in outs: out.pop(RemoteLOCAL.PARAM_CHECKSUM, None) out.pop(RemoteS3.PARAM_CHECKSUM, None) if old_d != new_d: return False # NOTE: committing to prevent potential data duplication. For example # # $ dvc config cache.type hardlink # $ echo foo > foo # $ dvc add foo # $ rm -f foo # $ echo foo > foo # $ dvc add foo # should replace foo with a link to cache # old.commit() return True @staticmethod def create(repo, **kwargs): wdir = kwargs.get("wdir", None) cwd = kwargs.get("cwd", None) fname = kwargs.get("fname", None) add = kwargs.get("add", False) # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "DVC-file name '{fname}' may not contain subdirectories" " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" " along with '-f' to specify DVC-file path with working" " directory.".format(fname=fname)) wdir = cwd elif wdir is None: wdir = os.curdir stage = Stage( repo=repo, wdir=wdir, cmd=kwargs.get("cmd", None), locked=kwargs.get("locked", False), ) Stage._fill_stage_outputs(stage, **kwargs) stage.deps = dependency.loads_from(stage, kwargs.get("deps", []), erepo=kwargs.get("erepo", None)) stage._check_circular_dependency() stage._check_duplicated_arguments() if not fname: fname = Stage._stage_fname(stage.outs, add) stage._check_dvc_filename(fname) # Autodetecting wdir for add, we need to create outs first to do that, # so we start with wdir = . and remap out paths later. if add and kwargs.get("wdir") is None and cwd is None: wdir = os.path.dirname(fname) for out in chain(stage.outs, stage.deps): if out.is_in_repo: out.def_path = relpath(out.path_info, wdir) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path ignore_build_cache = kwargs.get("ignore_build_cache", False) # NOTE: remove outs before we check build cache if kwargs.get("remove_outs", False): logger.warning("--remove-outs is deprecated." " It is now the default behavior," " so there's no need to use this option anymore.") stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True if os.path.exists(path) and any(out.persist for out in stage.outs): logger.warning("Build cache is ignored when persisting outputs.") ignore_build_cache = True if os.path.exists(path): if (not ignore_build_cache and stage.is_cached and not stage.is_callback): logger.info("Stage is cached, skipping.") return None msg = ("'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath)) if not kwargs.get("overwrite", True) and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage @staticmethod def _fill_stage_outputs(stage, **kwargs): stage.outs = output.loads_from(stage, kwargs.get("outs", []), use_cache=True) stage.outs += output.loads_from(stage, kwargs.get("metrics", []), use_cache=True, metric=True) stage.outs += output.loads_from(stage, kwargs.get("outs_persist", []), use_cache=True, persist=True) stage.outs += output.loads_from(stage, kwargs.get("outs_no_cache", []), use_cache=False) stage.outs += output.loads_from( stage, kwargs.get("metrics_no_cache", []), use_cache=False, metric=True, ) stage.outs += output.loads_from( stage, kwargs.get("outs_persist_no_cache", []), use_cache=False, persist=True, ) @staticmethod def _check_dvc_filename(fname): if not Stage.is_valid_filename(fname): raise StageFileBadNameError( "bad DVC-file name '{}'. DVC-files should be named" " 'Dvcfile' or have a '.dvc' suffix (e.g. '{}.dvc').".format( relpath(fname), os.path.basename(fname))) @staticmethod def _check_file_exists(repo, fname): if not repo.tree.exists(fname): raise StageFileDoesNotExistError(fname) @staticmethod def _check_isfile(repo, fname): if not repo.tree.isfile(fname): raise StageFileIsNotDvcFileError(fname) @classmethod def _get_path_tag(cls, s): regex = re.compile(cls.TAG_REGEX) match = regex.match(s) if not match: return s, None return match.group("path"), match.group("tag") @staticmethod def load(repo, fname): fname, tag = Stage._get_path_tag(fname) # it raises the proper exceptions by priority: # 1. when the file doesn't exists # 2. filename is not a DVC-file # 3. path doesn't represent a regular file Stage._check_file_exists(repo, fname) Stage._check_dvc_filename(fname) Stage._check_isfile(repo, fname) with repo.tree.open(fname) as fd: d = load_stage_fd(fd, fname) # Making a deepcopy since the original structure # looses keys in deps and outs load state = copy.deepcopy(d) Stage.validate(d, fname=relpath(fname)) path = os.path.abspath(fname) stage = Stage( repo=repo, path=path, wdir=os.path.abspath( os.path.join(os.path.dirname(path), d.get(Stage.PARAM_WDIR, "."))), cmd=d.get(Stage.PARAM_CMD), md5=d.get(Stage.PARAM_MD5), locked=d.get(Stage.PARAM_LOCKED, False), tag=tag, state=state, ) stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, [])) stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, [])) return stage def dumpd(self): rel_wdir = relpath(self.wdir, os.path.dirname(self.path)) return { key: value for key, value in { Stage.PARAM_MD5: self.md5, Stage.PARAM_CMD: self.cmd, Stage.PARAM_WDIR: pathlib.PurePath(rel_wdir).as_posix(), Stage.PARAM_LOCKED: self.locked, Stage.PARAM_DEPS: [d.dumpd() for d in self.deps], Stage.PARAM_OUTS: [o.dumpd() for o in self.outs], Stage.PARAM_META: self._state.get("meta"), }.items() if value } def dump(self): fname = self.path self._check_dvc_filename(fname) logger.info( "Saving information to '{file}'.".format(file=relpath(fname))) d = self.dumpd() apply_diff(d, self._state) dump_stage_file(fname, self._state) self.repo.scm.track_file(relpath(fname)) def _compute_md5(self): from dvc.output.base import OutputBase d = self.dumpd() # Remove md5 and meta, these should not affect stage md5 d.pop(self.PARAM_MD5, None) d.pop(self.PARAM_META, None) # Ignore the wdir default value. In this case DVC-file w/o # wdir has the same md5 as a file with the default value specified. # It's important for backward compatibility with pipelines that # didn't have WDIR in their DVC-files. if d.get(self.PARAM_WDIR) == ".": del d[self.PARAM_WDIR] # NOTE: excluding parameters that don't affect the state of the # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if # it has changed, we might not have that output in our cache. m = dict_md5( d, exclude=[ self.PARAM_LOCKED, OutputBase.PARAM_METRIC, OutputBase.PARAM_TAGS, OutputBase.PARAM_PERSIST, ], ) logger.debug("Computed stage '{}' md5: '{}'".format(self.relpath, m)) return m def save(self): for dep in self.deps: dep.save() for out in self.outs: out.save() self.md5 = self._compute_md5() @staticmethod def _changed_entries(entries): return [ str(entry) for entry in entries if entry.checksum and entry.changed_checksum() ] def check_can_commit(self, force): changed_deps = self._changed_entries(self.deps) changed_outs = self._changed_entries(self.outs) if changed_deps or changed_outs or self.changed_md5(): msg = ("dependencies {}".format(changed_deps) if changed_deps else "") msg += " and " if (changed_deps and changed_outs) else "" msg += "outputs {}".format(changed_outs) if changed_outs else "" msg += "md5" if not (changed_deps or changed_outs) else "" msg += " of '{}' changed. Are you sure you commit it?".format( self.relpath) if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " "force.`".format(self.relpath)) self.save() def commit(self): for out in self.outs: out.commit() def _check_missing_deps(self): missing = [dep for dep in self.deps if not dep.exists] if any(missing): raise MissingDep(missing) @staticmethod def _warn_if_fish(executable): # pragma: no cover if (executable is None or os.path.basename(os.path.realpath(executable)) != "fish"): return logger.warning( "DVC detected that you are using fish as your default " "shell. Be aware that it might cause problems by overwriting " "your current environment variables with values defined " "in '.fishrc', which might affect your command. See " "https://github.com/iterative/dvc/issues/1307. ") def _check_circular_dependency(self): from dvc.exceptions import CircularDependencyError circular_dependencies = set(d.path_info for d in self.deps) & set( o.path_info for o in self.outs) if circular_dependencies: raise CircularDependencyError(str(circular_dependencies.pop())) def _check_duplicated_arguments(self): from dvc.exceptions import ArgumentDuplicationError from collections import Counter path_counts = Counter(edge.path_info for edge in self.deps + self.outs) for path, occurrence in path_counts.items(): if occurrence > 1: raise ArgumentDuplicationError(str(path)) def _run(self): self._check_missing_deps() executable = os.getenv("SHELL") if os.name != "nt" else None self._warn_if_fish(executable) main_thread = isinstance(threading.current_thread(), threading._MainThread) old_handler = None p = None try: p = subprocess.Popen( self.cmd, cwd=self.wdir, shell=True, env=fix_env(os.environ), executable=executable, close_fds=True, ) if main_thread: old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) p.communicate() finally: if old_handler: signal.signal(signal.SIGINT, old_handler) if (p is None) or (p.returncode != 0): raise StageCmdFailedError(self) def run(self, dry=False, no_commit=False, force=False): if (self.cmd or self.is_import) and not self.locked and not dry: self.remove_outs(ignore_remove=False, force=False) if self.locked: logger.info("Verifying outputs in locked stage '{stage}'".format( stage=self.relpath)) if not dry: self.check_missing_outputs() elif self.is_import: logger.info("Importing '{dep}' -> '{out}'".format( dep=self.deps[0], out=self.outs[0])) if not dry: if not force and self._already_cached(): self.outs[0].checkout() else: self.deps[0].download(self.outs[0]) elif self.is_data_source: msg = "Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: logger.info("Running command:\n\t{}".format(self.cmd)) if not dry: if (not force and not self.is_callback and self._already_cached()): self.checkout() else: self._run() if not dry: self.save() if not no_commit: self.commit() def check_missing_outputs(self): paths = [str(out) for out in self.outs if not out.exists] if paths: raise MissingDataSource(paths) def checkout(self, force=False, progress_callback=None): for out in self.outs: out.checkout(force=force, tag=self.tag, progress_callback=progress_callback) @staticmethod def _status(entries): ret = {} for entry in entries: ret.update(entry.status()) return ret def status(self): ret = [] if not self.locked: deps_status = self._status(self.deps) if deps_status: ret.append({"changed deps": deps_status}) outs_status = self._status(self.outs) if outs_status: ret.append({"changed outs": outs_status}) if self.changed_md5(): ret.append("changed checksum") if self.is_callback: ret.append("always changed") if ret: return {self.relpath: ret} return {} def _already_cached(self): return (not self.changed_md5() and all(not dep.changed() for dep in self.deps) and all(not out.changed_cache() if out. use_cache else not out.changed() for out in self.outs)) def get_all_files_number(self): return sum(out.get_files_number() for out in self.outs)
from schema import Schema, And, SchemaError from transaction_service.domain.account_repository import AccountRepository from transaction_service.domain.commands import CreditAccount, DebitAccount TRANSACTION_SCHEMA = Schema( dict(id=And(str, len), accountNumber=And(str, lambda s: len(s) == 8), amount=And(int, lambda n: n > 0), operation=And(str, lambda s: s in ['credit', 'debit']), status='accepted', created=And(str, len))) class Application: def __init__(self, transaction_events, balance_updates, accounts_client, transaction_repository, logger): self.transaction_events = transaction_events self.balance_updates = balance_updates self.accounts_client = accounts_client self.transaction_repository = transaction_repository self.logger = logger def start(self): self.transaction_events.on_event(self.handle_event) def handle_event(self, event): self.logger.debug('Received transaction event', received_event=event) if self._schema_is_valid(event): self._perform_transaction(event)
class CoursesSchema(BaseSchema): SCHEMA = Schema([{ 'id': str, 'code': Or(And(str, BaseSchema.COURSE_CODE_LAMBDA), None), # course code should be a string and of length 8 'name': str, 'description': Or(str, None), 'division': str, 'department': str, 'prerequisites': Or(str, None), 'corequisites': Or(str, None), 'exclusions': Or(str, None), 'recommended_preparation': Or(str, None), 'level': Regex(r'^\d00(/(A|B|C|D))?$'), 'campus': Or(*BaseSchema.VALID_CAMPUSES), 'term': str, 'arts_and_science_breadth': Or(str, None), 'arts_and_science_distribution': Or(str, None), 'utm_distribution': Or(str, None), 'utsc_breadth': Or(str, None), 'apsc_electives': Or(str, None), 'meeting_sections': [{ 'code': str, 'instructors': Schema([str]), 'times': [{ 'day': str, 'start': int, 'end': int, 'duration': int, 'location': Or(str, None) }], 'size': int, 'enrollment': Or(int, None), 'waitlist_option': bool, 'delivery': str }], 'last_updated': str }])
def setType(key, type): '''check key type''' return And(type, error=SCHEMA_TYPE_ERROR % (key, type.__name__))
def test_schema_repr(): # what about repr with `error`s? schema = Schema([Or(None, And(str, Use(float)))]) repr_ = "Schema([Or(None, And(<type 'str'>, Use(<type 'float'>)))])" # in Python 3 repr contains <class 'str'>, not <type 'str'> assert repr(schema).replace("class", "type") == repr_
def setNumberRange(key, keyType, start, end): '''check number range''' return And( And(keyType, error=SCHEMA_TYPE_ERROR % (key, keyType.__name__)), And(lambda n: start <= n <= end, error=SCHEMA_RANGE_ERROR % (key, '(%s,%s)' % (start, end))), )
def path(self, key): return And( And(str, error='%s should be a string!' % key), And(lambda p: Path(p).exists(), error='%s path does not exist!' % (key)))
def main(): logging.basicConfig( stream=sys.stdout, \ level=logging.INFO, \ format='%(message)s', \ datefmt='%y-%m-%d %H:%M:%S') args = docopt.docopt(__doc__, version='SAGA PyAnimation 2.2') if args['--model'] is None: args['--model'] = args['<phone>'] requirements = { '<dir>': And(Use(str), lambda x: os.path.isdir(x), error='Data directory must exist'), '<phone>': And(Use(str), lambda x: x in ('galaxy', 'pixel'), error='Phone type only supports \'galaxy\' or \'pixel\''), '--model': And(Use(str), lambda x: x in ('galaxy', 'pixel'), error='Phone type only supports \'galaxy\' or \'pixel\''), '--start': And(Use(int), lambda x: x >= 0, error='Starting second must be interger >= 0'), '--rate': And(Use(int), lambda x: x > 0, error='Number of frames per second must be integer > 0'), } args = Schema(requirements).validate(args) dir = args['<dir>'] phone = args['<phone>'] TEST = { '021318': ['03'], '030318': ['04'], '040718': ['03'], '120117': ['02'], } date_list = sorted(os.listdir(os.path.join(dir, phone))) assert set(date_list) >= set(TEST) date_list = list(sorted(set(date_list) & set(TEST))) if len(date_list) > 1: date = input("\033[34;1mSelect a Date\033[0;m ({}): ".format( ', '.join(date_list))) assert date in date_list elif len(date_list) == 1: date = date_list[0] print("\033[34;1mSelect a Date\033[0;m ({}): {}".format( ', '.join(date_list), date)) else: raise NotImplementedError sequence = process_data(dir, phone, date) flight_list = [idt.split('_')[-1] for idt in sequence.identifier] assert set(flight_list) >= set(TEST[date]) flight_list = list(sorted(set(flight_list) & set(TEST[date]))) if len(flight_list) > 1: flight = input("\033[34;1mSelect a Flight\033[0;m ({}): ".format( ', '.join(flight_list))) assert flight in flight_list elif len(flight_list) == 1: flight = flight_list[0] print("\033[34;1mSelect a Flight\033[0;m ({}): {}".format( ', '.join(flight_list), flight)) else: raise NotImplementedError remain = ["{}_{}".format(date, flight)] sequence.prune_identifier(remain_identifier=remain) assert len(sequence) == 1 def get_col(flight, key): assert len(flight) == 1 id = flight.get_col_id(key) assert id is not None _, all_data = flight[0] return all_data[:, id] g1000_altitude = get_col(sequence.meta_target, 'alt') g1000_latitude = get_col(sequence.meta_target, 'lat') g1000_longitude = get_col(sequence.meta_target, 'long') g1000_pitch = get_col(sequence.meta_target, 'pitch') g1000_roll = get_col(sequence.meta_target, 'roll') g1000_heading = get_col(sequence.meta_target, 'heading') # print(g1000_altitude .shape) # print(g1000_latitude .shape) # print(g1000_longitude.shape) # print(g1000_pitch .shape) # print(g1000_roll .shape) # print(g1000_heading .shape) mphone = args['--model'] pitch_data = predict_trig(sequence, 'pitch', "./model/{}.pitch".format(mphone)) roll_data = predict_trig(sequence, 'roll', "./model/{}.roll".format(mphone)) heading_data = predict_trig(sequence, 'heading', "./model/{}.heading".format(mphone)) phone_altitude = get_col(sequence.meta_input, 'alt') phone_latitude = get_col(sequence.meta_input, 'lat') phone_longitude = get_col(sequence.meta_input, 'long') phone_pitch = get_col(pitch_data, '*pitch') phone_roll = get_col(roll_data, '*roll') phone_heading = get_col(heading_data, '*heading') # print(phone_altitude .shape) # print(phone_latitude .shape) # print(phone_longitude.shape) # print(phone_pitch .shape) # print(phone_roll .shape) # print(phone_heading .shape) anime = Animation(anime_data={ 'phone': phone[0].upper() + phone[1:].lower(), 'g1000_altitude': g1000_altitude, 'g1000_latitude': g1000_latitude, 'g1000_longitude': g1000_longitude, 'g1000_pitch': g1000_pitch, 'g1000_roll': g1000_roll, 'g1000_heading': g1000_heading, 'phone_altitude': phone_altitude, 'phone_latitude': phone_latitude, 'phone_longitude': phone_longitude, 'phone_pitch': phone_pitch, 'phone_roll': phone_roll, 'phone_heading': phone_heading, }, ptr=args['--start']) anime.render(rate=1000 // args['--rate'])
class Config(object): # pylint: disable=too-many-instance-attributes """Class that manages configuration files for a dvc repo. Args: dvc_dir (str): optional path to `.dvc` directory, that is used to access repo-specific configs like .dvc/config and .dvc/config.local. validate (bool): optional flag to tell dvc if it should validate the config or just load it as is. 'True' by default. Raises: ConfigError: thrown when config has an invalid format. """ APPNAME = "dvc" APPAUTHOR = "iterative" # NOTE: used internally in RemoteLOCAL to know config # location, that url should resolved relative to. PRIVATE_CWD = "_cwd" CONFIG = "config" CONFIG_LOCAL = "config.local" BOOL_SCHEMA = And(str, is_bool, Use(to_bool)) SECTION_CORE = "core" SECTION_CORE_LOGLEVEL = "loglevel" SECTION_CORE_LOGLEVEL_SCHEMA = And(Use(str.lower), supported_loglevel) SECTION_CORE_REMOTE = "remote" SECTION_CORE_INTERACTIVE_SCHEMA = BOOL_SCHEMA SECTION_CORE_INTERACTIVE = "interactive" SECTION_CORE_ANALYTICS = "analytics" SECTION_CORE_ANALYTICS_SCHEMA = BOOL_SCHEMA SECTION_CACHE = "cache" SECTION_CACHE_DIR = "dir" SECTION_CACHE_TYPE = "type" SECTION_CACHE_TYPE_SCHEMA = supported_cache_type SECTION_CACHE_PROTECTED = "protected" SECTION_CACHE_LOCAL = "local" SECTION_CACHE_S3 = "s3" SECTION_CACHE_GS = "gs" SECTION_CACHE_SSH = "ssh" SECTION_CACHE_HDFS = "hdfs" SECTION_CACHE_AZURE = "azure" SECTION_CACHE_SLOW_LINK_WARNING = "slow_link_warning" SECTION_CACHE_SCHEMA = { Optional(SECTION_CACHE_LOCAL): str, Optional(SECTION_CACHE_S3): str, Optional(SECTION_CACHE_GS): str, Optional(SECTION_CACHE_HDFS): str, Optional(SECTION_CACHE_SSH): str, Optional(SECTION_CACHE_AZURE): str, Optional(SECTION_CACHE_DIR): str, Optional(SECTION_CACHE_TYPE, default=None): SECTION_CACHE_TYPE_SCHEMA, Optional(SECTION_CACHE_PROTECTED, default=False): BOOL_SCHEMA, Optional(PRIVATE_CWD): str, Optional(SECTION_CACHE_SLOW_LINK_WARNING, default=True): BOOL_SCHEMA, } # backward compatibility SECTION_CORE_CLOUD = "cloud" SECTION_CORE_CLOUD_SCHEMA = And(Use(str.lower), supported_cloud) SECTION_CORE_STORAGEPATH = "storagepath" SECTION_CORE_SCHEMA = { Optional(SECTION_CORE_LOGLEVEL): And(str, Use(str.lower), SECTION_CORE_LOGLEVEL_SCHEMA), Optional(SECTION_CORE_REMOTE, default=""): And(str, Use(str.lower)), Optional(SECTION_CORE_INTERACTIVE, default=False): SECTION_CORE_INTERACTIVE_SCHEMA, Optional(SECTION_CORE_ANALYTICS, default=True): SECTION_CORE_ANALYTICS_SCHEMA, # backward compatibility Optional(SECTION_CORE_CLOUD, default=""): SECTION_CORE_CLOUD_SCHEMA, Optional(SECTION_CORE_STORAGEPATH, default=""): str, } # backward compatibility SECTION_AWS = "aws" SECTION_AWS_STORAGEPATH = "storagepath" SECTION_AWS_CREDENTIALPATH = "credentialpath" SECTION_AWS_ENDPOINT_URL = "endpointurl" SECTION_AWS_LIST_OBJECTS = "listobjects" SECTION_AWS_REGION = "region" SECTION_AWS_PROFILE = "profile" SECTION_AWS_USE_SSL = "use_ssl" SECTION_AWS_SSE = "sse" SECTION_AWS_SCHEMA = { SECTION_AWS_STORAGEPATH: str, Optional(SECTION_AWS_REGION): str, Optional(SECTION_AWS_PROFILE): str, Optional(SECTION_AWS_CREDENTIALPATH): str, Optional(SECTION_AWS_ENDPOINT_URL): str, Optional(SECTION_AWS_LIST_OBJECTS, default=False): BOOL_SCHEMA, Optional(SECTION_AWS_USE_SSL, default=True): BOOL_SCHEMA, Optional(SECTION_AWS_SSE): str, } # backward compatibility SECTION_GCP = "gcp" SECTION_GCP_STORAGEPATH = SECTION_AWS_STORAGEPATH SECTION_GCP_CREDENTIALPATH = SECTION_AWS_CREDENTIALPATH SECTION_GCP_PROJECTNAME = "projectname" SECTION_GCP_SCHEMA = { SECTION_GCP_STORAGEPATH: str, Optional(SECTION_GCP_PROJECTNAME): str, } # backward compatibility SECTION_LOCAL = "local" SECTION_LOCAL_STORAGEPATH = SECTION_AWS_STORAGEPATH SECTION_LOCAL_SCHEMA = {SECTION_LOCAL_STORAGEPATH: str} SECTION_AZURE_CONNECTION_STRING = "connection_string" # Alibabacloud oss options SECTION_OSS_ACCESS_KEY_ID = "oss_key_id" SECTION_OSS_ACCESS_KEY_SECRET = "oss_key_secret" SECTION_OSS_ENDPOINT = "oss_endpoint" SECTION_REMOTE_REGEX = r'^\s*remote\s*"(?P<name>.*)"\s*$' SECTION_REMOTE_FMT = 'remote "{}"' SECTION_REMOTE_URL = "url" SECTION_REMOTE_USER = "******" SECTION_REMOTE_PORT = "port" SECTION_REMOTE_KEY_FILE = "keyfile" SECTION_REMOTE_TIMEOUT = "timeout" SECTION_REMOTE_PASSWORD = "******" SECTION_REMOTE_ASK_PASSWORD = "******" SECTION_REMOTE_SCHEMA = { SECTION_REMOTE_URL: str, Optional(SECTION_AWS_REGION): str, Optional(SECTION_AWS_PROFILE): str, Optional(SECTION_AWS_CREDENTIALPATH): str, Optional(SECTION_AWS_ENDPOINT_URL): str, Optional(SECTION_AWS_LIST_OBJECTS, default=False): BOOL_SCHEMA, Optional(SECTION_AWS_USE_SSL, default=True): BOOL_SCHEMA, Optional(SECTION_AWS_SSE): str, Optional(SECTION_GCP_PROJECTNAME): str, Optional(SECTION_CACHE_TYPE): SECTION_CACHE_TYPE_SCHEMA, Optional(SECTION_CACHE_PROTECTED, default=False): BOOL_SCHEMA, Optional(SECTION_REMOTE_USER): str, Optional(SECTION_REMOTE_PORT): Use(int), Optional(SECTION_REMOTE_KEY_FILE): str, Optional(SECTION_REMOTE_TIMEOUT): Use(int), Optional(SECTION_REMOTE_PASSWORD): str, Optional(SECTION_REMOTE_ASK_PASSWORD): BOOL_SCHEMA, Optional(SECTION_AZURE_CONNECTION_STRING): str, Optional(SECTION_OSS_ACCESS_KEY_ID): str, Optional(SECTION_OSS_ACCESS_KEY_SECRET): str, Optional(SECTION_OSS_ENDPOINT): str, Optional(PRIVATE_CWD): str, } SECTION_STATE = "state" SECTION_STATE_ROW_LIMIT = "row_limit" SECTION_STATE_ROW_CLEANUP_QUOTA = "row_cleanup_quota" SECTION_STATE_SCHEMA = { Optional(SECTION_STATE_ROW_LIMIT): And(Use(int), is_whole), Optional(SECTION_STATE_ROW_CLEANUP_QUOTA): And(Use(int), is_percent), } SCHEMA = { Optional(SECTION_CORE, default={}): SECTION_CORE_SCHEMA, Optional(Regex(SECTION_REMOTE_REGEX)): SECTION_REMOTE_SCHEMA, Optional(SECTION_CACHE, default={}): SECTION_CACHE_SCHEMA, Optional(SECTION_STATE, default={}): SECTION_STATE_SCHEMA, # backward compatibility Optional(SECTION_AWS, default={}): SECTION_AWS_SCHEMA, Optional(SECTION_GCP, default={}): SECTION_GCP_SCHEMA, Optional(SECTION_LOCAL, default={}): SECTION_LOCAL_SCHEMA, } def __init__(self, dvc_dir=None, validate=True): self.system_config_file = os.path.join(self.get_system_config_dir(), self.CONFIG) self.global_config_file = os.path.join(self.get_global_config_dir(), self.CONFIG) if dvc_dir is not None: self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir)) self.config_file = os.path.join(dvc_dir, self.CONFIG) self.config_local_file = os.path.join(dvc_dir, self.CONFIG_LOCAL) else: self.dvc_dir = None self.config_file = None self.config_local_file = None self._system_config = None self._global_config = None self._repo_config = None self._local_config = None self.config = None self.load(validate=validate) @staticmethod def get_global_config_dir(): """Returns global config location. E.g. ~/.config/dvc/config. Returns: str: path to the global config directory. """ from appdirs import user_config_dir return user_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) @staticmethod def get_system_config_dir(): """Returns system config location. E.g. /etc/dvc.conf. Returns: str: path to the system config directory. """ from appdirs import site_config_dir return site_config_dir(appname=Config.APPNAME, appauthor=Config.APPAUTHOR) @staticmethod def init(dvc_dir): """Initializes dvc config. Args: dvc_dir (str): path to .dvc directory. Returns: dvc.config.Config: config object. """ config_file = os.path.join(dvc_dir, Config.CONFIG) open(config_file, "w+").close() return Config(dvc_dir) def _load(self): self._system_config = configobj.ConfigObj(self.system_config_file) self._global_config = configobj.ConfigObj(self.global_config_file) if self.config_file is not None: self._repo_config = configobj.ConfigObj(self.config_file) else: self._repo_config = configobj.ConfigObj() if self.config_local_file is not None: self._local_config = configobj.ConfigObj(self.config_local_file) else: self._local_config = configobj.ConfigObj() self.config = None def _load_config(self, path): config = configobj.ConfigObj(path) config = self._lower(config) self._resolve_paths(config, path) return config @staticmethod def _resolve_path(path, config_file): assert os.path.isabs(config_file) config_dir = os.path.dirname(config_file) return os.path.abspath(os.path.join(config_dir, path)) def _resolve_cache_path(self, config, fname): cache = config.get(self.SECTION_CACHE) if cache is None: return cache_dir = cache.get(self.SECTION_CACHE_DIR) if cache_dir is None: return cache[self.PRIVATE_CWD] = os.path.dirname(fname) def _resolve_paths(self, config, fname): if fname is None: return self._resolve_cache_path(config, fname) for section in config.values(): if self.SECTION_REMOTE_URL not in section.keys(): continue section[self.PRIVATE_CWD] = os.path.dirname(fname) def load(self, validate=True): """Loads config from all the config files. Args: validate (bool): optional flag to tell dvc if it should validate the config or just load it as is. 'True' by default. Raises: dvc.config.ConfigError: thrown if config has invalid format. """ self._load() try: self.config = self._load_config(self.system_config_file) user = self._load_config(self.global_config_file) config = self._load_config(self.config_file) local = self._load_config(self.config_local_file) # NOTE: schema doesn't support ConfigObj.Section validation, so we # need to convert our config to dict before passing it to for conf in [user, config, local]: self.config = self._merge(self.config, conf) if validate: self.config = Schema(self.SCHEMA).validate(self.config) # NOTE: now converting back to ConfigObj self.config = configobj.ConfigObj(self.config, write_empty_values=True) self.config.filename = self.config_file self._resolve_paths(self.config, self.config_file) except Exception as ex: raise ConfigError(ex) @staticmethod def _get_key(conf, name, add=False): for k in conf.keys(): if k.lower() == name.lower(): return k if add: conf[name] = {} return name return None def save(self, config=None): """Saves config to config files. Args: config (configobj.ConfigObj): optional config object to save. Raises: dvc.config.ConfigError: thrown if failed to write config file. """ if config is not None: clist = [config] else: clist = [ self._system_config, self._global_config, self._repo_config, self._local_config, ] for conf in clist: if conf.filename is None: continue try: logger.debug("Writing '{}'.".format(conf.filename)) dname = os.path.dirname(os.path.abspath(conf.filename)) try: os.makedirs(dname) except OSError as exc: if exc.errno != errno.EEXIST: raise conf.write() except Exception as exc: msg = "failed to write config '{}'".format(conf.filename) raise ConfigError(msg, exc) def get_remote_settings(self, name): import posixpath """ Args: name (str): The name of the remote that we want to retrieve Returns: dict: The content beneath the given remote name. Example: >>> config = {'remote "server"': {'url': 'ssh://localhost/'}} >>> get_remote_settings("server") {'url': 'ssh://localhost/'} """ settings = self.config[self.SECTION_REMOTE_FMT.format(name)] parsed = urlparse(settings["url"]) # Support for cross referenced remotes. # This will merge the settings, giving priority to the outer reference. # For example, having: # # dvc remote add server ssh://localhost # dvc remote modify server user root # dvc remote modify server ask_password true # # dvc remote add images remote://server/tmp/pictures # dvc remote modify images user alice # dvc remote modify images ask_password false # dvc remote modify images password asdf1234 # # Results on a config dictionary like: # # { # "url": "ssh://localhost/tmp/pictures", # "user": "******", # "password": "******", # "ask_password": False, # } # if parsed.scheme == "remote": reference = self.get_remote_settings(parsed.netloc) url = posixpath.join(reference["url"], parsed.path.lstrip("/")) merged = reference.copy() merged.update(settings) merged["url"] = url return merged return settings @staticmethod def unset(config, section, opt=None): """Unsets specified option and/or section in the config. Args: config (configobj.ConfigObj): config to work on. section (str): section name. opt (str): optional option name. """ if section not in config.keys(): raise ConfigError("section '{}' doesn't exist".format(section)) if opt is None: del config[section] return if opt not in config[section].keys(): raise ConfigError("option '{}.{}' doesn't exist".format( section, opt)) del config[section][opt] if not config[section]: del config[section] @staticmethod def set(config, section, opt, value): """Sets specified option in the config. Args: config (configobj.ConfigObj): config to work on. section (str): section name. opt (str): option name. value: value to set option to. """ if section not in config.keys(): config[section] = {} config[section][opt] = value @staticmethod def show(config, section, opt): """Prints option value from the config. Args: config (configobj.ConfigObj): config to work on. section (str): section name. opt (str): option name. """ if section not in config.keys(): raise ConfigError("section '{}' doesn't exist".format(section)) if opt not in config[section].keys(): raise ConfigError("option '{}.{}' doesn't exist".format( section, opt)) logger.info(config[section][opt]) @staticmethod def _merge(first, second): res = {} sections = list(first.keys()) + list(second.keys()) for section in sections: first_copy = first.get(section, {}).copy() second_copy = second.get(section, {}).copy() first_copy.update(second_copy) res[section] = first_copy return res @staticmethod def _lower(config): new_config = {} for s_key, s_value in config.items(): new_s = {} for key, value in s_value.items(): new_s[key.lower()] = str(value) new_config[s_key.lower()] = new_s return new_config